diff options
Diffstat (limited to 'arch')
982 files changed, 16827 insertions, 14933 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index c47b328eada0..e8d19c3cb91f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -260,6 +260,14 @@ config ARCH_HAS_SET_MEMORY config ARCH_HAS_SET_DIRECT_MAP bool +# +# Select if arch has an uncached kernel segment and provides the +# uncached_kernel_address / cached_kernel_address symbols to use it +# +config ARCH_HAS_UNCACHED_SEGMENT + select ARCH_HAS_DMA_PREP_COHERENT + bool + # Select if arch init_task must go in the __init_task_data section config ARCH_TASK_STRUCT_ON_STACK bool diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index b3314e0dcb6f..12dee59b011c 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -8,8 +8,6 @@ # Copyright (C) 1994 by Linus Torvalds # -KBUILD_DEFCONFIG := defconfig - NM := $(NM) -B LDFLAGS_vmlinux := -static -N #-relax diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 150a1c5d6a2c..2144530d1428 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -93,9 +93,9 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ } #define ATOMIC64_OP(op, asm_op) \ -static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \ { \ - unsigned long temp; \ + s64 temp; \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%2,%0\n" \ @@ -109,9 +109,9 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } \ #define ATOMIC64_OP_RETURN(op, asm_op) \ -static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ { \ - long temp, result; \ + s64 temp, result; \ __asm__ __volatile__( \ "1: ldq_l %0,%1\n" \ " " #asm_op " %0,%3,%2\n" \ @@ -128,9 +128,9 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ } #define ATOMIC64_FETCH_OP(op, asm_op) \ -static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ { \ - long temp, result; \ + s64 temp, result; \ __asm__ __volatile__( \ "1: ldq_l %2,%1\n" \ " " #asm_op " %2,%3,%0\n" \ @@ -246,9 +246,9 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long c, new, old; + s64 c, new, old; smp_mb(); __asm__ __volatile__( "1: ldq_l %[old],%[mem]\n" @@ -276,9 +276,9 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) * The function returns the old value of *v minus 1, even if * the atomic variable, v, was not decremented. */ -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long old, tmp; + s64 old, tmp; smp_mb(); __asm__ __volatile__( "1: ldq_l %[old],%[mem]\n" diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index 02f9f91bb4f0..71ded3b7d82d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -5,6 +5,8 @@ #include <linux/mm.h> #include <linux/mmzone.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + /* * Allocate and free page tables. The xxx_kernel() versions are * used to allocate a kernel page table - this turns on ASN bits @@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd) static inline pmd_t * pmd_alloc_one(struct mm_struct *mm, unsigned long address) { - pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); + pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER); return ret; } @@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -static inline void -pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - pte_t *pte = pte_alloc_one_kernel(mm); - struct page *page; - - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline void -pte_free(struct mm_struct *mm, pgtable_t page) -{ - pgtable_page_dtor(page); - __free_page(page); -} - #define check_pgt_cache() do { } while (0) #endif /* _ALPHA_PGALLOC_H */ diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 976e89b116e5..de6c4df61082 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_RCVTIMEO_NEW 66 #define SO_SNDTIMEO_NEW 67 +#define SO_DETACH_REUSEPORT_BPF 68 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 33e904a05881..a813020d2f11 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -225,7 +225,7 @@ do_sigreturn(struct sigcontext __user *sc) return; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void @@ -253,7 +253,7 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) return; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index d0dccae53ba9..5f90df30be20 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -614,8 +614,7 @@ void smp_imb(void) { /* Must wait other processors to flush their icache before continue. */ - if (on_each_cpu(ipi_imb, NULL, 1)) - printk(KERN_CRIT "smp_imb: timed out\n"); + on_each_cpu(ipi_imb, NULL, 1); } EXPORT_SYMBOL(smp_imb); @@ -630,9 +629,7 @@ flush_tlb_all(void) { /* Although we don't have any data to pass, we do want to synchronize with the other processors. */ - if (on_each_cpu(ipi_flush_tlb_all, NULL, 1)) { - printk(KERN_CRIT "flush_tlb_all: timed out\n"); - } + on_each_cpu(ipi_flush_tlb_all, NULL, 1); } #define asn_locked() (cpu_data[smp_processor_id()].asn_lock) @@ -667,9 +664,7 @@ flush_tlb_mm(struct mm_struct *mm) } } - if (smp_call_function(ipi_flush_tlb_mm, mm, 1)) { - printk(KERN_CRIT "flush_tlb_mm: timed out\n"); - } + smp_call_function(ipi_flush_tlb_mm, mm, 1); preempt_enable(); } @@ -720,9 +715,7 @@ flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) data.mm = mm; data.addr = addr; - if (smp_call_function(ipi_flush_tlb_page, &data, 1)) { - printk(KERN_CRIT "flush_tlb_page: timed out\n"); - } + smp_call_function(ipi_flush_tlb_page, &data, 1); preempt_enable(); } @@ -772,9 +765,7 @@ flush_icache_user_range(struct vm_area_struct *vma, struct page *page, } } - if (smp_call_function(ipi_flush_icache_page, mm, 1)) { - printk(KERN_CRIT "flush_icache_page: timed out\n"); - } + smp_call_function(ipi_flush_icache_page, mm, 1); preempt_enable(); } diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 9e7704e44f6d..1db9bbcfb84e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 541 common fsconfig sys_fsconfig 542 common fsmount sys_fsmount 543 common fspick sys_fspick +544 common pidfd_open sys_pidfd_open diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index bc9627698796..f6b9664ac504 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -402,7 +402,7 @@ do_entDbg(struct pt_regs *regs) { die_if_kernel("Instruction fault", regs, 0, NULL); - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0); } diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 188fc9256baf..741e61ef9d3f 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -221,13 +221,13 @@ retry: up_read(&mm->mmap_sem); /* Send a sigbus, regardless of whether we were in kernel or user mode. */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0); if (!user_mode(regs)) goto no_context; return; do_sigsegv: - force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0); return; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/alpha/oprofile/common.c b/arch/alpha/oprofile/common.c index 310a4ce1dccc..1b1259c7d7d1 100644 --- a/arch/alpha/oprofile/common.c +++ b/arch/alpha/oprofile/common.c @@ -65,7 +65,7 @@ op_axp_setup(void) model->reg_setup(®, ctr, &sys); /* Configure the registers on all cpus. */ - (void)smp_call_function(model->cpu_setup, ®, 1); + smp_call_function(model->cpu_setup, ®, 1); model->cpu_setup(®); return 0; } @@ -86,7 +86,7 @@ op_axp_cpu_start(void *dummy) static int op_axp_start(void) { - (void)smp_call_function(op_axp_cpu_start, NULL, 1); + smp_call_function(op_axp_cpu_start, NULL, 1); op_axp_cpu_start(NULL); return 0; } @@ -101,7 +101,7 @@ op_axp_cpu_stop(void *dummy) static void op_axp_stop(void) { - (void)smp_call_function(op_axp_cpu_stop, NULL, 1); + smp_call_function(op_axp_cpu_stop, NULL, 1); op_axp_cpu_stop(NULL); } diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 1c8137e7247b..8383155c8c82 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -7,6 +7,7 @@ config ARC def_bool y select ARC_TIMERS select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS select ARCH_HAS_SYNC_DMA_FOR_CPU @@ -16,6 +17,7 @@ config ARC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 480af1af9e63..ee6d1184c2b1 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := nsim_hs_defconfig +ifeq ($(CROSS_COMPILE),) +CROSS_COMPILE := $(call cc-cross-prefix, arc-linux- arceb-linux-) +endif + cflags-y += -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__ cflags-$(CONFIG_ISA_ARCOMPACT) += -mA7 cflags-$(CONFIG_ISA_ARCV2) += -mcpu=hs38 @@ -15,7 +19,7 @@ ifdef CONFIG_ARC_CURR_IN_REG # any kernel headers, and missing the r25 global register # Can't do unconditionally because of recursive include issues # due to <linux/thread_info.h> -LINUXINCLUDE += -include ${src}/arch/arc/include/asm/current.h +LINUXINCLUDE += -include $(srctree)/arch/arc/include/asm/current.h endif cflags-y += -fsection-anchors diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig index 5b5119d2b5d5..dc739bd093e3 100644 --- a/arch/arc/configs/tb10x_defconfig +++ b/arch/arc/configs/tb10x_defconfig @@ -94,6 +94,7 @@ CONFIG_CONFIGFS_FS=y CONFIG_DEBUG_INFO=y CONFIG_STRIP_ASM_SYMS=y CONFIG_DEBUG_FS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 17cf1c657cb3..7298ce84762e 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -321,14 +321,14 @@ ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3) */ typedef struct { - aligned_u64 counter; + s64 __aligned(8) counter; } atomic64_t; #define ATOMIC64_INIT(a) { (a) } -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - unsigned long long val; + s64 val; __asm__ __volatile__( " ldd %0, [%1] \n" @@ -338,7 +338,7 @@ static inline long long atomic64_read(const atomic64_t *v) return val; } -static inline void atomic64_set(atomic64_t *v, long long a) +static inline void atomic64_set(atomic64_t *v, s64 a) { /* * This could have been a simple assignment in "C" but would need @@ -359,9 +359,9 @@ static inline void atomic64_set(atomic64_t *v, long long a) } #define ATOMIC64_OP(op, op1, op2) \ -static inline void atomic64_##op(long long a, atomic64_t *v) \ +static inline void atomic64_##op(s64 a, atomic64_t *v) \ { \ - unsigned long long val; \ + s64 val; \ \ __asm__ __volatile__( \ "1: \n" \ @@ -372,13 +372,13 @@ static inline void atomic64_##op(long long a, atomic64_t *v) \ " bnz 1b \n" \ : "=&r"(val) \ : "r"(&v->counter), "ir"(a) \ - : "cc"); \ + : "cc"); \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ +static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v) \ { \ - unsigned long long val; \ + s64 val; \ \ smp_mb(); \ \ @@ -399,9 +399,9 @@ static inline long long atomic64_##op##_return(long long a, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline long long atomic64_fetch_##op(long long a, atomic64_t *v) \ +static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v) \ { \ - unsigned long long val, orig; \ + s64 val, orig; \ \ smp_mb(); \ \ @@ -441,10 +441,10 @@ ATOMIC64_OPS(xor, xor, xor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline long long -atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) +static inline s64 +atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) { - long long prev; + s64 prev; smp_mb(); @@ -464,9 +464,9 @@ atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new) return prev; } -static inline long long atomic64_xchg(atomic64_t *ptr, long long new) +static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new) { - long long prev; + s64 prev; smp_mb(); @@ -492,9 +492,9 @@ static inline long long atomic64_xchg(atomic64_t *ptr, long long new) * the atomic variable, v, was not decremented. */ -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long long val; + s64 val; smp_mb(); @@ -525,10 +525,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) * Atomically adds @a to @v, if it was not @u. * Returns the old value of @v */ -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) +static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long long old, temp; + s64 old, temp; smp_mb(); diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index ff321f7df716..e1889ce3faf9 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -97,7 +97,7 @@ fault: goto again; fail: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return ret; } @@ -310,7 +310,7 @@ int elf_check_arch(const struct elf32_hdr *x) eflags = x->e_flags; if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); return 0; } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index b895f889602a..3d57ed0d8535 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -194,7 +194,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return regs->r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index e9a5b259f405..57235e5c0cea 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -47,7 +47,7 @@ unhandled_exception(const char *str, struct pt_regs *regs, tsk->thread.fault_address = (__force unsigned int)addr; - force_sig_fault(signo, si_code, addr, tsk); + force_sig_fault(signo, si_code, addr); } else { /* If not due to copy_(to|from)_user, we are doomed */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 0bf1468c35a3..62c210e7ee4c 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -8,51 +8,15 @@ #include <asm/cacheflush.h> /* - * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) + * ARCH specific callbacks for generic noncoherent DMA ops * - hardware IOC not available (or "dma-coherent" not set for device in DT) * - But still handle both coherent and non-coherent requests from caller * * For DMA coherent hardware (IOC) generic code suffices */ -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned long order = get_order(size); - struct page *page; - phys_addr_t paddr; - void *kvaddr; - bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT); - - /* - * __GFP_HIGHMEM flag is cleared by upper layer functions - * (in include/linux/dma-mapping.h) so we should never get a - * __GFP_HIGHMEM here. - */ - BUG_ON(gfp & __GFP_HIGHMEM); - - page = alloc_pages(gfp | __GFP_ZERO, order); - if (!page) - return NULL; - - /* This is linear addr (0x8000_0000 based) */ - paddr = page_to_phys(page); - - *dma_handle = paddr; - - /* - * A coherent buffer needs MMU mapping to enforce non-cachability. - * kvaddr is kernel Virtual address (0x7000_0000 based). - */ - if (need_coh) { - kvaddr = ioremap_nocache(paddr, size); - if (kvaddr == NULL) { - __free_pages(page, order); - return NULL; - } - } else { - kvaddr = (void *)(u32)paddr; - } +void arch_dma_prep_coherent(struct page *page, size_t size) +{ /* * Evict any existing L1 and/or L2 lines for the backing page * in case it was used earlier as a normal "cached" page. @@ -63,28 +27,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, * Currently flush_cache_vmap nukes the L1 cache completely which * will be optimized as a separate commit */ - if (need_coh) - dma_cache_wback_inv(paddr, size); - - return kvaddr; -} - -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - phys_addr_t paddr = dma_handle; - struct page *page = virt_to_page(paddr); - - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - iounmap((void __force __iomem *)vaddr); - - __free_pages(page, get_order(size)); -} - -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return __phys_to_pfn(dma_addr); + dma_cache_wback_inv(page_to_phys(page), size); } /* @@ -161,3 +104,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev_info(dev, "use %sncoherent DMA ops\n", dev->dma_coherent ? "" : "non"); } + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); +} +postcore_initcall(atomic_pool_init); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 8cca03480bb2..81e84426fe21 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -196,7 +196,7 @@ bad_area: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { tsk->thread.fault_address = address; - force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -231,5 +231,5 @@ do_sigbus: goto no_context; tsk->thread.fault_address = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 2eaecfb063a7..a376a50d3fea 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # menuconfig ARC_PLAT_EZNPS diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 6a91a742ab3d..7dd2dd335cf6 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -32,8 +32,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu) #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) -#define CREG_PAE (CREG_BASE + 0x180) -#define CREG_PAE_UPDATE (CREG_BASE + 0x194) #define SDIO_BASE (ARC_PERIPHERAL_BASE + 0xA000) #define SDIO_UHS_REG_EXT (SDIO_BASE + 0x108) @@ -99,20 +97,167 @@ static void __init hsdk_enable_gpio_intc_wire(void) iowrite32(GPIO_INT_CONNECTED_MASK, (void __iomem *) GPIO_INTEN); } -static void __init hsdk_init_early(void) +enum hsdk_axi_masters { + M_HS_CORE = 0, + M_HS_RTT, + M_AXI_TUN, + M_HDMI_VIDEO, + M_HDMI_AUDIO, + M_USB_HOST, + M_ETHERNET, + M_SDIO, + M_GPU, + M_DMAC_0, + M_DMAC_1, + M_DVFS +}; + +#define UPDATE_VAL 1 + +/* + * This is modified configuration of AXI bridge. Default settings + * are specified in "Table 111 CREG Address Decoder register reset values". + * + * AXI_M_m_SLV{0|1} - Slave Select register for master 'm'. + * Possible slaves are: + * - 0 => no slave selected + * - 1 => DDR controller port #1 + * - 2 => SRAM controller + * - 3 => AXI tunnel + * - 4 => EBI controller + * - 5 => ROM controller + * - 6 => AXI2APB bridge + * - 7 => DDR controller port #2 + * - 8 => DDR controller port #3 + * - 9 => HS38x4 IOC + * - 10 => HS38x4 DMI + * AXI_M_m_OFFSET{0|1} - Addr Offset register for master 'm' + * + * Please read ARC HS Development IC Specification, section 17.2 for more + * information about apertures configuration. + * + * m master AXI_M_m_SLV0 AXI_M_m_SLV1 AXI_M_m_OFFSET0 AXI_M_m_OFFSET1 + * 0 HS (CBU) 0x11111111 0x63111111 0xFEDCBA98 0x0E543210 + * 1 HS (RTT) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 2 AXI Tunnel 0x88888888 0x88888888 0xFEDCBA98 0x76543210 + * 3 HDMI-VIDEO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 4 HDMI-ADUIO 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 5 USB-HOST 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 6 ETHERNET 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 7 SDIO 0x77777777 0x77999999 0xFEDCBA98 0x76DCBA98 + * 8 GPU 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 9 DMAC (port #1) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 10 DMAC (port #2) 0x77777777 0x77777777 0xFEDCBA98 0x76543210 + * 11 DVFS 0x00000000 0x60000000 0x00000000 0x00000000 + */ + +#define CREG_AXI_M_SLV0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m))) +#define CREG_AXI_M_SLV1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x04)) +#define CREG_AXI_M_OFT0(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x08)) +#define CREG_AXI_M_OFT1(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x0C)) +#define CREG_AXI_M_UPDT(m) ((void __iomem *)(CREG_BASE + 0x20 * (m) + 0x14)) + +#define CREG_AXI_M_HS_CORE_BOOT ((void __iomem *)(CREG_BASE + 0x010)) + +#define CREG_PAE ((void __iomem *)(CREG_BASE + 0x180)) +#define CREG_PAE_UPDT ((void __iomem *)(CREG_BASE + 0x194)) + +static void __init hsdk_init_memory_bridge(void) { + u32 reg; + + /* + * M_HS_CORE has one unique register - BOOT. + * We need to clean boot mirror (BOOT[1:0]) bits in them to avoid first + * aperture to be masked by 'boot mirror'. + */ + reg = readl(CREG_AXI_M_HS_CORE_BOOT) & (~0x3); + writel(reg, CREG_AXI_M_HS_CORE_BOOT); + writel(0x11111111, CREG_AXI_M_SLV0(M_HS_CORE)); + writel(0x63111111, CREG_AXI_M_SLV1(M_HS_CORE)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_CORE)); + writel(0x0E543210, CREG_AXI_M_OFT1(M_HS_CORE)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_CORE)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HS_RTT)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HS_RTT)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HS_RTT)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HS_RTT)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HS_RTT)); + + writel(0x88888888, CREG_AXI_M_SLV0(M_AXI_TUN)); + writel(0x88888888, CREG_AXI_M_SLV1(M_AXI_TUN)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_AXI_TUN)); + writel(0x76543210, CREG_AXI_M_OFT1(M_AXI_TUN)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_AXI_TUN)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_VIDEO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_VIDEO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_VIDEO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_VIDEO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_VIDEO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_HDMI_AUDIO)); + writel(0x77777777, CREG_AXI_M_SLV1(M_HDMI_AUDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_HDMI_AUDIO)); + writel(0x76543210, CREG_AXI_M_OFT1(M_HDMI_AUDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_HDMI_AUDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_USB_HOST)); + writel(0x77999999, CREG_AXI_M_SLV1(M_USB_HOST)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_USB_HOST)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_USB_HOST)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_USB_HOST)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_ETHERNET)); + writel(0x77999999, CREG_AXI_M_SLV1(M_ETHERNET)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_ETHERNET)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_ETHERNET)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_ETHERNET)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_SDIO)); + writel(0x77999999, CREG_AXI_M_SLV1(M_SDIO)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_SDIO)); + writel(0x76DCBA98, CREG_AXI_M_OFT1(M_SDIO)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_SDIO)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_GPU)); + writel(0x77777777, CREG_AXI_M_SLV1(M_GPU)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_GPU)); + writel(0x76543210, CREG_AXI_M_OFT1(M_GPU)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_GPU)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_0)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_0)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_0)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_0)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_0)); + + writel(0x77777777, CREG_AXI_M_SLV0(M_DMAC_1)); + writel(0x77777777, CREG_AXI_M_SLV1(M_DMAC_1)); + writel(0xFEDCBA98, CREG_AXI_M_OFT0(M_DMAC_1)); + writel(0x76543210, CREG_AXI_M_OFT1(M_DMAC_1)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DMAC_1)); + + writel(0x00000000, CREG_AXI_M_SLV0(M_DVFS)); + writel(0x60000000, CREG_AXI_M_SLV1(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT0(M_DVFS)); + writel(0x00000000, CREG_AXI_M_OFT1(M_DVFS)); + writel(UPDATE_VAL, CREG_AXI_M_UPDT(M_DVFS)); + /* * PAE remapping for DMA clients does not work due to an RTL bug, so * CREG_PAE register must be programmed to all zeroes, otherwise it * will cause problems with DMA to/from peripherals even if PAE40 is * not used. */ + writel(0x00000000, CREG_PAE); + writel(UPDATE_VAL, CREG_PAE_UPDT); +} - /* Default is 1, which means "PAE offset = 4GByte" */ - writel_relaxed(0, (void __iomem *) CREG_PAE); - - /* Really apply settings made above */ - writel(1, (void __iomem *) CREG_PAE_UPDATE); +static void __init hsdk_init_early(void) +{ + hsdk_init_memory_bridge(); /* * Switch SDIO external ciu clock divider from default div-by-8 to diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8869742a85df..2bf1ce39a96d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM default y select ARCH_32BIT_OFF_T select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE @@ -30,6 +31,7 @@ config ARM select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_IPC_PARSE_VERSION + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_EXTABLE_SORT if MMU select CLONE_BACKWARDS select CPU_PM if SUSPEND || CPU_IDLE @@ -73,6 +75,7 @@ config ARM select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU select HAVE_EXIT_THREAD + select HAVE_FAST_GUP if ARM_LPAE select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL @@ -1175,6 +1178,14 @@ config ARM_ERRATA_825619 DMB NSHST or DMB ISHST instruction followed by a mix of Cacheable and Device/Strongly-Ordered loads and stores might cause deadlock +config ARM_ERRATA_857271 + bool "ARM errata: A12: CPU might deadlock under some very rare internal conditions" + depends on CPU_V7 + help + This option enables the workaround for the 857271 Cortex-A12 + (all revs) erratum. Under very rare timing conditions, the CPU might + hang. The workaround is expected to have a < 1% performance impact. + config ARM_ERRATA_852421 bool "ARM errata: A17: DMB ST might fail to create order between stores" depends on CPU_V7 @@ -1196,6 +1207,16 @@ config ARM_ERRATA_852423 config option from the A12 erratum due to the way errata are checked for and handled. +config ARM_ERRATA_857272 + bool "ARM errata: A17: CPU might deadlock under some very rare internal conditions" + depends on CPU_V7 + help + This option enables the workaround for the 857272 Cortex-A17 erratum. + This erratum is not known to be fixed in any A17 revision. + This is identical to Cortex-A12 erratum 857271. It is a separate + config option from the A12 erratum due to the way errata are checked + for and handled. + endmenu source "arch/arm/common/Kconfig" @@ -1232,6 +1253,18 @@ config PCI_HOST_ITE8152 default y select DMABOUNCE +config ARM_ERRATA_814220 + bool "ARM errata: Cache maintenance by set/way operations can execute out of order" + depends on CPU_V7 + help + The v7 ARM states that all cache and branch predictor maintenance + operations that do not specify an address execute, relative to + each other, in program order. + However, because of this erratum, an L2 set/way cache maintenance + operation can overtake an L1 set/way cache maintenance operation. + This ERRATA only affected the Cortex-A7 and present in r0p2, r0p3, + r0p4, r0p5. + endmenu menu "Kernel Features" @@ -1263,7 +1296,7 @@ config SMP uniprocessor machines. On a uniprocessor machine, the kernel will run faster if you say N here. - See also <file:Documentation/x86/i386/IO-APIC.txt>, + See also <file:Documentation/x86/i386/IO-APIC.rst>, <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://tldp.org/HOWTO/SMP-HOWTO.html>. @@ -1590,16 +1623,9 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool ARCH_SPARSEMEM_ENABLE -config ARCH_SELECT_MEMORY_MODEL - def_bool ARCH_SPARSEMEM_ENABLE - config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM -config HAVE_GENERIC_GUP - def_bool y - depends on ARM_LPAE - config HIGHMEM bool "High Memory Support" depends on MMU @@ -2010,7 +2036,7 @@ config CRASH_DUMP kdump/kexec. The crash dump kernel must be compiled to a memory address not used by the main kernel - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config AUTO_ZRELADDR bool "Auto calculation of the decompressed kernel image address" diff --git a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi index 59753470cd34..267d0c178e55 100644 --- a/arch/arm/boot/dts/armada-xp-98dx3236.dtsi +++ b/arch/arm/boot/dts/armada-xp-98dx3236.dtsi @@ -336,3 +336,11 @@ status = "disabled"; }; +&uart0 { + compatible = "marvell,armada-38x-uart"; +}; + +&uart1 { + compatible = "marvell,armada-38x-uart"; +}; + diff --git a/arch/arm/boot/dts/gemini-dlink-dir-685.dts b/arch/arm/boot/dts/gemini-dlink-dir-685.dts index cfbfbc91a1e1..3613f05f8a80 100644 --- a/arch/arm/boot/dts/gemini-dlink-dir-685.dts +++ b/arch/arm/boot/dts/gemini-dlink-dir-685.dts @@ -20,7 +20,7 @@ }; chosen { - bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait"; + bootargs = "console=ttyS0,19200n8 root=/dev/sda1 rw rootwait consoleblank=300"; stdout-path = "uart0:19200n8"; }; diff --git a/arch/arm/boot/dts/gemini-dlink-dns-313.dts b/arch/arm/boot/dts/gemini-dlink-dns-313.dts index b12504e10f0b..360642a02a48 100644 --- a/arch/arm/boot/dts/gemini-dlink-dns-313.dts +++ b/arch/arm/boot/dts/gemini-dlink-dns-313.dts @@ -11,7 +11,7 @@ / { model = "D-Link DNS-313 1-Bay Network Storage Enclosure"; - compatible = "dlink,dir-313", "cortina,gemini"; + compatible = "dlink,dns-313", "cortina,gemini"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index bbf010c73336..a7f6d1d58e20 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -358,7 +358,7 @@ pwm1: pwm@2080000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02080000 0x4000>; - interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM1>, <&clks IMX6UL_CLK_PWM1>; clock-names = "ipg", "per"; @@ -369,7 +369,7 @@ pwm2: pwm@2084000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02084000 0x4000>; - interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM2>, <&clks IMX6UL_CLK_PWM2>; clock-names = "ipg", "per"; @@ -380,7 +380,7 @@ pwm3: pwm@2088000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x02088000 0x4000>; - interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM3>, <&clks IMX6UL_CLK_PWM3>; clock-names = "ipg", "per"; @@ -391,7 +391,7 @@ pwm4: pwm@208c000 { compatible = "fsl,imx6ul-pwm", "fsl,imx27-pwm"; reg = <0x0208c000 0x4000>; - interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_PWM4>, <&clks IMX6UL_CLK_PWM4>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index d6b711011cba..e20483714be5 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -100,6 +100,29 @@ reg = <0x40000000 0x800000>; ranges; + crypto: crypto@40240000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x40240000 0x10000>; + ranges = <0 0x40240000 0x10000>; + clocks = <&pcc2 IMX7ULP_CLK_CAAM>, + <&scg1 IMX7ULP_CLK_NIC1_BUS_DIV>; + clock-names = "aclk", "ipg"; + + sec_jr0: jr0@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; + }; + + sec_jr1: jr1@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>; + }; + }; + lpuart4: serial@402d0000 { compatible = "fsl,imx7ulp-lpuart"; reg = <0x402d0000 0x1000>; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 7ef442462ea4..40c11b6b217a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -248,8 +248,8 @@ <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, - <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 173 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>, @@ -264,7 +264,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 800cd65fc50a..ec67f49116d9 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -163,23 +163,23 @@ opp-255000000 { opp-hz = /bits/ 64 <255000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-364300000 { opp-hz = /bits/ 64 <364300000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-425000000 { opp-hz = /bits/ 64 <425000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-510000000 { opp-hz = /bits/ 64 <510000000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; }; opp-637500000 { opp-hz = /bits/ 64 <637500000>; - opp-microvolt = <1150000>; + opp-microvolt = <1100000>; turbo-mode; }; }; @@ -229,7 +229,6 @@ clocks = <&clkc CLKID_CLK81>, <&clkc CLKID_MALI>; clock-names = "bus", "core"; operating-points-v2 = <&gpu_opp_table>; - switch-delay = <0xffff>; }; }; }; /* end of / */ diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 1252522392c7..1d8bfed7830c 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -424,6 +424,7 @@ &usb_host1 { status = "okay"; + snps,need-phy-for-wake; }; &usb_otg { @@ -432,6 +433,7 @@ assigned-clocks = <&cru SCLK_USBPHY480M_SRC>; assigned-clock-parents = <&usbphy0>; dr_mode = "host"; + snps,need-phy-for-wake; }; &vopb { diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 13e561737ca8..746e1fce777e 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -539,16 +539,14 @@ static void bL_switcher_trace_trigger_cpu(void *__always_unused info) int bL_switcher_trace_trigger(void) { - int ret; - preempt_disable(); bL_switcher_trace_trigger_cpu(NULL); - ret = smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); + smp_call_function(bL_switcher_trace_trigger_cpu, NULL, true); preempt_enable(); - return ret; + return 0; } EXPORT_SYMBOL_GPL(bL_switcher_trace_trigger); diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig index c95c54284da2..9b959afaaa12 100644 --- a/arch/arm/configs/exynos_defconfig +++ b/arch/arm/configs/exynos_defconfig @@ -9,6 +9,7 @@ CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_ARCH_EXYNOS=y CONFIG_ARCH_EXYNOS3=y +CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND=y CONFIG_SMP=y CONFIG_BIG_LITTLE=y CONFIG_NR_CPUS=8 diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c index 48a89537b828..a8e9b534c8da 100644 --- a/arch/arm/crypto/chacha-neon-glue.c +++ b/arch/arm/crypto/chacha-neon-glue.c @@ -63,7 +63,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, } static int chacha_neon_stream_xor(struct skcipher_request *req, - struct chacha_ctx *ctx, u8 *iv) + const struct chacha_ctx *ctx, const u8 *iv) { struct skcipher_walk walk; u32 state[16]; diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 232eeab1ec37..8775aa42bbbe 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -34,7 +34,7 @@ int sha512_arm_update(struct shash_desc *desc, const u8 *data, (sha512_block_fn *)sha512_block_data_order); } -int sha512_arm_final(struct shash_desc *desc, u8 *out) +static int sha512_arm_final(struct shash_desc *desc, u8 *out) { sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_block_data_order); diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index a8f149ab45b8..6b2dc15b6dff 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += early_ioremap.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h +generic-y += flat.h generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 4b66ecd6be99..99175812d903 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -4,6 +4,7 @@ #include <asm/barrier.h> #include <asm/errno.h> +#include <asm/hwcap.h> #include <linux/clocksource.h> #include <linux/init.h> #include <linux/types.h> @@ -124,6 +125,15 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +static inline void arch_timer_set_evtstrm_feature(void) +{ + elf_hwcap |= HWCAP_EVTSTRM; +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return elf_hwcap & HWCAP_EVTSTRM; +} #endif #endif diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 50c3ac5f0809..75bb2c543e59 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -246,15 +246,15 @@ ATOMIC_OPS(xor, ^=, eor) #ifndef CONFIG_GENERIC_ATOMIC64 typedef struct { - long long counter; + s64 counter; } atomic64_t; #define ATOMIC64_INIT(i) { (i) } #ifdef CONFIG_ARM_LPAE -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long long result; + s64 result; __asm__ __volatile__("@ atomic64_read\n" " ldrd %0, %H0, [%1]" @@ -265,7 +265,7 @@ static inline long long atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { __asm__ __volatile__("@ atomic64_set\n" " strd %2, %H2, [%1]" @@ -274,9 +274,9 @@ static inline void atomic64_set(atomic64_t *v, long long i) ); } #else -static inline long long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long long result; + s64 result; __asm__ __volatile__("@ atomic64_read\n" " ldrexd %0, %H0, [%1]" @@ -287,9 +287,9 @@ static inline long long atomic64_read(const atomic64_t *v) return result; } -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { - long long tmp; + s64 tmp; prefetchw(&v->counter); __asm__ __volatile__("@ atomic64_set\n" @@ -304,9 +304,9 @@ static inline void atomic64_set(atomic64_t *v, long long i) #endif #define ATOMIC64_OP(op, op1, op2) \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ +static inline void atomic64_##op(s64 i, atomic64_t *v) \ { \ - long long result; \ + s64 result; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -323,10 +323,10 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ } \ #define ATOMIC64_OP_RETURN(op, op1, op2) \ -static inline long long \ -atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ +static inline s64 \ +atomic64_##op##_return_relaxed(s64 i, atomic64_t *v) \ { \ - long long result; \ + s64 result; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -346,10 +346,10 @@ atomic64_##op##_return_relaxed(long long i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, op1, op2) \ -static inline long long \ -atomic64_fetch_##op##_relaxed(long long i, atomic64_t *v) \ +static inline s64 \ +atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v) \ { \ - long long result, val; \ + s64 result, val; \ unsigned long tmp; \ \ prefetchw(&v->counter); \ @@ -403,10 +403,9 @@ ATOMIC64_OPS(xor, eor, eor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -static inline long long -atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new) +static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new) { - long long oldval; + s64 oldval; unsigned long res; prefetchw(&ptr->counter); @@ -427,9 +426,9 @@ atomic64_cmpxchg_relaxed(atomic64_t *ptr, long long old, long long new) } #define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed -static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new) +static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new) { - long long result; + s64 result; unsigned long tmp; prefetchw(&ptr->counter); @@ -447,9 +446,9 @@ static inline long long atomic64_xchg_relaxed(atomic64_t *ptr, long long new) } #define atomic64_xchg_relaxed atomic64_xchg_relaxed -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline s64 atomic64_dec_if_positive(atomic64_t *v) { - long long result; + s64 result; unsigned long tmp; smp_mb(); @@ -475,10 +474,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) } #define atomic64_dec_if_positive atomic64_dec_if_positive -static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, - long long u) +static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long long oldval, newval; + s64 oldval, newval; unsigned long tmp; smp_mb(); diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index 36c951dd23b8..deef4d0cb3b5 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -85,7 +85,7 @@ void hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, extern asmlinkage void c_backtrace(unsigned long fp, int pmode); struct mm_struct; -extern void show_pte(struct mm_struct *mm, unsigned long addr); +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); #endif diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d6667b8cfca5..7114b9aa46b8 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -476,4 +476,11 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size) void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); + +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND +void check_cpu_icache_size(int cpuid); +#else +static inline void check_cpu_icache_size(int cpuid) { } +#endif + #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 03ba90ffc0f8..7e0486ad1318 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -89,13 +89,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) } #endif -/* The ARM override for dma_max_pfn() */ -static inline unsigned long dma_max_pfn(struct device *dev) -{ - return dma_to_pfn(dev, *dev->dma_mask); -} -#define dma_max_pfn(dev) dma_max_pfn(dev) - /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h deleted file mode 100644 index f0c75ddeea23..000000000000 --- a/arch/arm/include/asm/flat.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * arch/arm/include/asm/flat.h -- uClinux flat-format executables - */ - -#ifndef __ARM_FLAT_H__ -#define __ARM_FLAT_H__ - -#include <linux/uaccess.h> - -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) - -static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - return copy_from_user(addr, rp, 4) ? -EFAULT : 0; -#else - return get_user(*addr, rp); -#endif -} - -static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) -{ -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - return copy_to_user(rp, &addr, 4) ? -EFAULT : 0; -#else - return put_user(addr, rp); -#endif -} - -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 - -#endif /* __ARM_FLAT_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 6b7644a383f6..40002416efec 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -271,6 +271,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK; } +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) +{ + return false; +} + +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, + bool flag) +{ +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { *vcpu_cpsr(vcpu) |= PSR_E_BIT; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f80418ddeb60..8a37c8e89777 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -15,7 +15,6 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/fpstate.h> -#include <asm/smp_plat.h> #include <kvm/arm_arch_timer.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -147,11 +146,10 @@ struct kvm_host_data { typedef struct kvm_host_data kvm_host_data_t; -static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, - int cpu) +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); + cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr(); } struct vcpu_reset_state { @@ -362,7 +360,11 @@ static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_vhe_guest_enter(void) {} static inline void kvm_arm_vhe_guest_exit(void) {} -static inline bool kvm_arm_harden_branch_predictor(void) +#define KVM_BP_HARDEN_UNKNOWN -1 +#define KVM_BP_HARDEN_WA_NEEDED 0 +#define KVM_BP_HARDEN_NOT_REQUIRED 1 + +static inline int kvm_arm_harden_branch_predictor(void) { switch(read_cpuid_part()) { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR @@ -370,10 +372,12 @@ static inline bool kvm_arm_harden_branch_predictor(void) case ARM_CPU_PART_CORTEX_A12: case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_CORTEX_A17: - return true; + return KVM_BP_HARDEN_WA_NEEDED; #endif + case ARM_CPU_PART_CORTEX_A7: + return KVM_BP_HARDEN_NOT_REQUIRED; default: - return false; + return KVM_BP_HARDEN_UNKNOWN; } } diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 71ac1c8d101c..40e9034db601 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -82,13 +82,14 @@ #define VFP_FPEXC __ACCESS_VFP(FPEXC) /* AArch64 compatibility macros, only for the timer so far */ -#define read_sysreg_el0(r) read_sysreg(r##_el0) -#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) +#define read_sysreg_el0(r) read_sysreg(r##_EL0) +#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0) + +#define SYS_CNTP_CTL_EL0 CNTP_CTL +#define SYS_CNTP_CVAL_EL0 CNTP_CVAL +#define SYS_CNTV_CTL_EL0 CNTV_CTL +#define SYS_CNTV_CVAL_EL0 CNTV_CVAL -#define cntp_ctl_el0 CNTP_CTL -#define cntp_cval_el0 CNTP_CVAL -#define cntv_ctl_el0 CNTV_CTL -#define cntv_cval_el0 CNTV_CVAL #define cntvoff_el2 CNTVOFF #define cnthctl_el2 CNTHCTL diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index c038cff6fdd3..a2a68b751971 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -54,8 +54,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) - static inline void clean_pte_table(pte_t *pte) { clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE); @@ -77,54 +75,41 @@ static inline void clean_pte_table(pte_t *pte) * | h/w pt 1 | * +------------+ */ + +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include <asm-generic/pgalloc.h> + static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; + pte_t *pte = __pte_alloc_one_kernel(mm); - pte = (pte_t *)__get_free_page(PGALLOC_GFP); if (pte) clean_pte_table(pte); return pte; } +#ifdef CONFIG_HIGHPTE +#define PGTABLE_HIGHMEM __GFP_HIGHMEM +#else +#define PGTABLE_HIGHMEM 0 +#endif + static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { struct page *pte; -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); -#else - pte = alloc_pages(PGALLOC_GFP, 0); -#endif + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM); if (!pte) return NULL; if (!PageHighMem(pte)) clean_pte_table(page_address(pte)); - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } return pte; } -/* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - if (pte) - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte, pmdval_t prot) { diff --git a/arch/arm/include/asm/ptdump.h b/arch/arm/include/asm/ptdump.h index 3ebf9718288d..0c2d3d0d4cc6 100644 --- a/arch/arm/include/asm/ptdump.h +++ b/arch/arm/include/asm/ptdump.h @@ -21,13 +21,10 @@ struct ptdump_info { void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info); #ifdef CONFIG_ARM_PTDUMP_DEBUGFS -int ptdump_debugfs_register(struct ptdump_info *info, const char *name); +void ptdump_debugfs_register(struct ptdump_info *info, const char *name); #else -static inline int ptdump_debugfs_register(struct ptdump_info *info, - const char *name) -{ - return 0; -} +static inline void ptdump_debugfs_register(struct ptdump_info *info, + const char *name) { } #endif /* CONFIG_ARM_PTDUMP_DEBUGFS */ void ptdump_check_wx(void); diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index a00288d75ee6..172b08ff3760 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -30,7 +30,7 @@ static inline int __in_irqentry_text(unsigned long ptr) extern void __init early_trap_init(void *); extern void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame); -extern void ptrace_break(struct task_struct *tsk, struct pt_regs *regs); +extern void ptrace_break(struct pt_regs *regs); extern void *vectors_page; diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 9fb00973c608..3676e82cf95c 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -37,6 +37,7 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 /* * Unimplemented (or alternatively implemented) syscalls diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 4602464ebdfb..a4217c1a5d01 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -214,6 +214,18 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) + /* Higher values mean better protection. */ +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 diff --git a/arch/arm/kernel/efi.c b/arch/arm/kernel/efi.c index ed005870671a..e57dbcc89123 100644 --- a/arch/arm/kernel/efi.c +++ b/arch/arm/kernel/efi.c @@ -8,8 +8,7 @@ #include <asm/mach/map.h> #include <asm/mmu_context.h> -static int __init set_permissions(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; pte_t pte = *ptep; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index afcb4d3b14dc..324352787aea 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -198,15 +198,15 @@ void ptrace_disable(struct task_struct *child) /* * Handle hitting a breakpoint. */ -void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) +void ptrace_break(struct pt_regs *regs) { force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *)instruction_pointer(regs), tsk); + (void __user *)instruction_pointer(regs)); } static int break_trap(struct pt_regs *regs, unsigned int instr) { - ptrace_break(current, regs); + ptrace_break(regs); return 0; } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 3ca71d679aec..09f6fdd41974 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -247,7 +247,7 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) return regs->ARM_r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -280,7 +280,7 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) return regs->ARM_r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index a137608cd197..aab8ba40ce38 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -372,6 +372,7 @@ static void smp_store_cpu_info(unsigned int cpuid) cpu_info->cpuid = read_cpuid_id(); store_cpu_topology(cpuid); + check_cpu_icache_size(cpuid); } /* diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 60e375ce1ab2..d17cb1e6d679 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -169,7 +169,7 @@ static void update_cpu_capacity(unsigned int cpu) topology_set_cpu_scale(cpu, cpu_capacity(cpu) / middle_capacity); pr_info("CPU%u: update cpu_capacity %lu\n", - cpu, topology_get_cpu_scale(NULL, cpu)); + cpu, topology_get_cpu_scale(cpu)); } #else diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 7e2f1cba84e5..c053abd1fb53 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -369,7 +369,7 @@ void arm_notify_die(const char *str, struct pt_regs *regs, current->thread.error_code = err; current->thread.trap_no = trap; - force_sig_fault(signo, si_code, addr, current); + force_sig_fault(signo, si_code, addr); } else { die(str, regs, err); } @@ -603,7 +603,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) case NR(breakpoint): /* SWI BREAK_POINT */ regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; - ptrace_break(current, regs); + ptrace_break(regs); return regs->ARM_r0; /* @@ -722,10 +722,11 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_BADABORT) { + pr_err("8<--- cut here ---\n"); pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n", task_pid_nr(current), current->comm, code, instr); dump_instr(KERN_ERR, regs); - show_pte(current->mm, addr); + show_pte(KERN_ERR, current->mm, addr); } #endif diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 51a892702e27..a273ab25c668 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -61,6 +61,9 @@ static struct regulator_consumer_supply da830_evm_usb_supplies[] = { static struct regulator_init_data da830_evm_usb_vbus_data = { .consumer_supplies = da830_evm_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(da830_evm_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config da830_evm_usb_vbus = { @@ -88,7 +91,7 @@ static struct gpiod_lookup_table da830_evm_usb_oc_gpio_lookup = { static struct gpiod_lookup_table da830_evm_usb_vbus_gpio_lookup = { .dev_id = "reg-fixed-voltage.0", .table = { - GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, "vbus", 0), + GPIO_LOOKUP("davinci_gpio", ON_BD_USB_DRV, NULL, 0), { } }, }; diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index db177a6a7e48..5390a8630cf0 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -306,6 +306,9 @@ static struct regulator_consumer_supply hawk_usb_supplies[] = { static struct regulator_init_data hawk_usb_vbus_data = { .consumer_supplies = hawk_usb_supplies, .num_consumer_supplies = ARRAY_SIZE(hawk_usb_supplies), + .constraints = { + .valid_ops_mask = REGULATOR_CHANGE_STATUS, + }, }; static struct fixed_voltage_config hawk_usb_vbus = { diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 0af2bf6f9933..43899fa56674 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -11,6 +11,7 @@ * in the MontaVista 2.4 kernel (and the Amstrad changes therein) */ #include <linux/gpio/consumer.h> +#include <linux/gpio/machine.h> #include <linux/gpio/driver.h> #include <linux/interrupt.h> #include <linux/irq.h> @@ -99,7 +100,8 @@ void __init ams_delta_init_fiq(struct gpio_chip *chip, } for (i = 0; i < ARRAY_SIZE(irq_data); i++) { - gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], 0); + gpiod = gpiochip_request_own_desc(chip, i, pin_name[i], + GPIO_ACTIVE_HIGH, GPIOD_IN); if (IS_ERR(gpiod)) { pr_err("%s: failed to get GPIO pin %d (%ld)\n", __func__, i, PTR_ERR(gpiod)); diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 36498ea1b2f3..e47a6fbcfd6e 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -10,6 +10,7 @@ */ #include <linux/gpio/driver.h> #include <linux/gpio/machine.h> +#include <linux/gpio/consumer.h> #include <linux/gpio.h> #include <linux/kernel.h> #include <linux/init.h> @@ -606,12 +607,12 @@ static void __init modem_assign_irq(struct gpio_chip *chip) struct gpio_desc *gpiod; gpiod = gpiochip_request_own_desc(chip, AMS_DELTA_GPIO_PIN_MODEM_IRQ, - "modem_irq", 0); + "modem_irq", GPIO_ACTIVE_HIGH, + GPIOD_IN); if (IS_ERR(gpiod)) { pr_err("%s: modem IRQ GPIO request failed (%ld)\n", __func__, PTR_ERR(gpiod)); } else { - gpiod_direction_input(gpiod); ams_delta_modem_ports[0].irq = gpiod_to_irq(gpiod); } } diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 406fd2a9a88f..bd5be82101f3 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -987,84 +987,44 @@ static int debug_clock_show(struct seq_file *s, void *unused) DEFINE_SHOW_ATTRIBUTE(debug_clock); -static int clk_debugfs_register_one(struct clk *c) +static void clk_debugfs_register_one(struct clk *c) { - int err; struct dentry *d; struct clk *pa = c->parent; d = debugfs_create_dir(c->name, pa ? pa->dent : clk_debugfs_root); - if (!d) - return -ENOMEM; c->dent = d; - d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount); - if (!d) { - err = -ENOMEM; - goto err_out; - } - d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate); - if (!d) { - err = -ENOMEM; - goto err_out; - } - d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags); - if (!d) { - err = -ENOMEM; - goto err_out; - } - return 0; - -err_out: - debugfs_remove_recursive(c->dent); - return err; + debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount); + debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate); + debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags); } -static int clk_debugfs_register(struct clk *c) +static void clk_debugfs_register(struct clk *c) { - int err; struct clk *pa = c->parent; - if (pa && !pa->dent) { - err = clk_debugfs_register(pa); - if (err) - return err; - } + if (pa && !pa->dent) + clk_debugfs_register(pa); - if (!c->dent) { - err = clk_debugfs_register_one(c); - if (err) - return err; - } - return 0; + if (!c->dent) + clk_debugfs_register_one(c); } static int __init clk_debugfs_init(void) { struct clk *c; struct dentry *d; - int err; d = debugfs_create_dir("clock", NULL); - if (!d) - return -ENOMEM; clk_debugfs_root = d; - list_for_each_entry(c, &clocks, node) { - err = clk_debugfs_register(c); - if (err) - goto err_out; - } + list_for_each_entry(c, &clocks, node) + clk_debugfs_register(c); - d = debugfs_create_file("summary", S_IRUGO, - d, NULL, &debug_clock_fops); - if (!d) - return -ENOMEM; + debugfs_create_file("summary", S_IRUGO, d, NULL, &debug_clock_fops); return 0; -err_out: - debugfs_remove_recursive(clk_debugfs_root); - return err; } late_initcall(clk_debugfs_init); diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c index 998075d3ef86..d068958d6f8a 100644 --- a/arch/arm/mach-omap1/pm.c +++ b/arch/arm/mach-omap1/pm.c @@ -539,11 +539,8 @@ static void omap_pm_init_debugfs(void) struct dentry *d; d = debugfs_create_dir("pm_debug", NULL); - if (!d) - return; - - (void) debugfs_create_file("omap_pm", S_IWUSR | S_IRUGO, - d, NULL, &omap_pm_debug_fops); + debugfs_create_file("omap_pm", S_IWUSR | S_IRUGO, d, NULL, + &omap_pm_debug_fops); } #endif /* CONFIG_DEBUG_FS */ diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index fe6ec9b580b9..fceb1e525d26 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -190,9 +190,8 @@ static int __init pwrdms_setup(struct powerdomain *pwrdm, void *dir) return 0; d = debugfs_create_dir(pwrdm->name, (struct dentry *)dir); - if (d) - (void) debugfs_create_file("suspend", S_IRUGO|S_IWUSR, d, - (void *)pwrdm, &pwrdm_suspend_fops); + debugfs_create_file("suspend", S_IRUGO|S_IWUSR, d, pwrdm, + &pwrdm_suspend_fops); return 0; } @@ -230,16 +229,14 @@ static int __init pm_dbg_init(void) return 0; d = debugfs_create_dir("pm_debug", NULL); - if (!d) - return -EINVAL; - (void) debugfs_create_file("count", 0444, d, NULL, &pm_dbg_counters_fops); - (void) debugfs_create_file("time", 0444, d, NULL, &pm_dbg_timers_fops); + debugfs_create_file("count", 0444, d, NULL, &pm_dbg_counters_fops); + debugfs_create_file("time", 0444, d, NULL, &pm_dbg_timers_fops); pwrdm_for_each(pwrdms_setup, (void *)d); - (void) debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d, - &enable_off_mode, &pm_dbg_option_fops); + debugfs_create_file("enable_off_mode", S_IRUGO | S_IWUSR, d, + &enable_off_mode, &pm_dbg_option_fops); pm_dbg_init_done = 1; return 0; diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index fd4a3bf27993..1b442b128569 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -430,7 +430,7 @@ static void omap3_prm_reconfigure_io_chain(void) * registers, and omap3xxx_prm_reconfigure_io_chain() must be called. * No return value. */ -static void __init omap3xxx_prm_enable_io_wakeup(void) +static void omap3xxx_prm_enable_io_wakeup(void) { if (prm_features & PRM_HAS_IO_WAKEUP) omap2_prm_set_mod_reg_bits(OMAP3430_EN_IO_MASK, WKUP_MOD, diff --git a/arch/arm/mach-pxa/am200epd.c b/arch/arm/mach-pxa/am200epd.c index 50e18ed37fa6..cac0bb09db14 100644 --- a/arch/arm/mach-pxa/am200epd.c +++ b/arch/arm/mach-pxa/am200epd.c @@ -347,8 +347,17 @@ int __init am200_init(void) { int ret; - /* before anything else, we request notification for any fb - * creation events */ + /* + * Before anything else, we request notification for any fb + * creation events. + * + * FIXME: This is terrible and needs to be nuked. The notifier is used + * to get at the fb base address from the boot splash fb driver, which + * is then passed to metronomefb. Instaed of metronomfb or this board + * support file here figuring this out on their own. + * + * See also the #ifdef in fbmem.c. + */ fb_register_client(&am200_fb_notif); pxa2xx_mfp_config(ARRAY_AND_SIZE(am200_pin_config)); diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index 379424d72ae7..8ec6a4f5eb05 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/init.h> #include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <linux/leds.h> #include <linux/delay.h> #include <linux/mmc/host.h> @@ -398,7 +399,6 @@ static struct pca953x_platform_data crag6410_pca_data = { /* VDDARM is controlled by DVS1 connected to GPK(0) */ static struct wm831x_buckv_pdata vddarm_pdata = { .dvs_control_src = 1, - .dvs_gpio = S3C64XX_GPK(0), }; static struct regulator_consumer_supply vddarm_consumers[] = { @@ -596,6 +596,24 @@ static struct wm831x_pdata crag_pmic_pdata = { .touch = &touch_pdata, }; +/* + * VDDARM is eventually ending up as a regulator hanging on the MFD cell device + * "wm831x-buckv.1" spawn from drivers/mfd/wm831x-core.c. + * + * From the note on the platform data we can see that this is clearly DVS1 + * and assigned as dcdc1 resource to the MFD core which sets .id of the cell + * spawning the DVS1 platform device to 1, then the cell platform device + * name is calculated from 10*instance + id resulting in the device name + * "wm831x-buckv.11" + */ +static struct gpiod_lookup_table crag_pmic_gpiod_table = { + .dev_id = "wm831x-buckv.11", + .table = { + GPIO_LOOKUP("GPIOK", 0, "dvs", GPIO_ACTIVE_HIGH), + { }, + }, +}; + static struct i2c_board_info i2c_devs0[] = { { I2C_BOARD_INFO("24c08", 0x50), }, { I2C_BOARD_INFO("tca6408", 0x20), @@ -836,6 +854,7 @@ static void __init crag6410_machine_init(void) s3c_fb_set_platdata(&crag6410_lcd_pdata); dwc2_hsotg_set_platdata(&crag6410_hsotg_pdata); + gpiod_add_lookup_table(&crag_pmic_gpiod_table); i2c_register_board_info(0, i2c_devs0, ARRAY_SIZE(i2c_devs0)); i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); diff --git a/arch/arm/mach-stm32/Kconfig b/arch/arm/mach-stm32/Kconfig index 36e6c68c0b57..05d6b5aada80 100644 --- a/arch/arm/mach-stm32/Kconfig +++ b/arch/arm/mach-stm32/Kconfig @@ -44,6 +44,7 @@ if ARCH_MULTI_V7 config MACH_STM32MP157 bool "STMicroelectronics STM32MP157" + select ARM_ERRATA_814220 default y endif # ARMv7-A diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index b169e580bf82..cc798115aa9b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -780,6 +780,14 @@ config CPU_ICACHE_DISABLE Say Y here to disable the processor instruction cache. Unless you have a reason not to or are unsure, say N. +config CPU_ICACHE_MISMATCH_WORKAROUND + bool "Workaround for I-Cache line size mismatch between CPU cores" + depends on SMP && CPU_V7 + help + Some big.LITTLE systems have I-Cache line size mismatch between + LITTLE and big cores. Say Y here to enable a workaround for + proper I-Cache support on such systems. If unsure, say N. + config CPU_DCACHE_DISABLE bool "Disable D-Cache (C-bit)" depends on (CPU_CP15 && !SMP) || CPU_V7M diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 6067fa4de22b..8cdb78642e93 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -945,7 +945,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto fixup; if (ai_usermode & UM_SIGNAL) { - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } else { /* * We're about to disable the alignment trap and return to diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 8c83b4586883..0ee8fc4b4672 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -16,6 +16,14 @@ #include "proc-macros.S" +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND +.globl icache_size + .data + .align 2 +icache_size: + .long 64 + .text +#endif /* * The secondary kernel init calls v7_flush_dcache_all before it enables * the L1; however, the L1 comes out of reset in an undefined state, so @@ -160,6 +168,9 @@ loop2: skip: add r10, r10, #2 @ increment cache number cmp r3, r10 +#ifdef CONFIG_ARM_ERRATA_814220 + dsb +#endif bgt flush_levels finished: mov r10, #0 @ switch back to cache level 0 @@ -281,7 +292,12 @@ ENTRY(v7_coherent_user_range) cmp r12, r1 blo 1b dsb ishst +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND + ldr r3, =icache_size + ldr r2, [r3, #0] +#else icache_line_size r2, r3 +#endif sub r3, r2, #1 bic r12, r0, r3 2: diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 1aea01ba1262..52b82559d99b 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -35,18 +35,7 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size, unsigned long attrs) { - void *ret; - - /* - * Try generic allocator first if we are advertised that - * consistency is not required. - */ - - if (attrs & DMA_ATTR_NON_CONSISTENT) - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, - attrs); - - ret = dma_alloc_from_global_coherent(size, dma_handle); + void *ret = dma_alloc_from_global_coherent(size, dma_handle); /* * dma_alloc_from_global_coherent() may fail because: @@ -66,16 +55,9 @@ static void arm_nommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { - if (attrs & DMA_ATTR_NON_CONSISTENT) { - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); - } else { - int ret = dma_release_from_global_coherent(get_order(size), - cpu_addr); - - WARN_ON_ONCE(ret == 0); - } + int ret = dma_release_from_global_coherent(get_order(size), cpu_addr); - return; + WARN_ON_ONCE(ret == 0); } static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 439bb6a59a04..4789c60a86e3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -216,25 +216,7 @@ EXPORT_SYMBOL(arm_coherent_dma_ops); static int __dma_supported(struct device *dev, u64 mask, bool warn) { - unsigned long max_dma_pfn; - - /* - * If the mask allows for more memory than we can address, - * and we actually have that much memory, then we must - * indicate that DMA to this device is not supported. - */ - if (sizeof(mask) != sizeof(dma_addr_t) && - mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) < max_pfn - 1) { - if (warn) { - dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", - mask); - dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); - } - return 0; - } - - max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + unsigned long max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); /* * Translate the device's DMA mask to a PFN limit. This @@ -493,8 +475,7 @@ void __init dma_contiguous_remap(void) } } -static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, - void *data) +static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) { struct page *page = virt_to_page(addr); pgprot_t prot = *(pgprot_t *)data; diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 006d27ee4fc6..7d6291f23251 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -446,7 +446,7 @@ void ptdump_check_wx(void) static int ptdump_init(void) { ptdump_initialize(); - return ptdump_debugfs_register(&kernel_ptdump_info, - "kernel_page_tables"); + ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); + return 0; } __initcall(ptdump_init); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0048eadd0681..0e417233dad7 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -53,17 +53,16 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr) * This is useful to dump out the page tables associated with * 'addr' in mm 'mm'. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; if (!mm) mm = &init_mm; - pr_alert("pgd = %p\n", mm->pgd); + printk("%spgd = %p\n", lvl, mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%08llx", - addr, (long long)pgd_val(*pgd)); + printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd)); do { pud_t *pud; @@ -118,7 +117,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) pr_cont("\n"); } #else /* CONFIG_MMU */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) { } #endif /* CONFIG_MMU */ @@ -139,11 +138,12 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, * No handler, we'll have to terminate things with extreme prejudice. */ bust_spinlocks(1); + pr_alert("8<--- cut here ---\n"); pr_alert("Unable to handle kernel %s at virtual address %08lx\n", (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request", addr); - show_pte(mm, addr); + show_pte(KERN_ALERT, mm, addr); die("Oops", regs, fsr); bust_spinlocks(0); do_exit(SIGKILL); @@ -154,19 +154,21 @@ __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, * User mode accesses just cause a SIGSEGV */ static void -__do_user_fault(struct task_struct *tsk, unsigned long addr, - unsigned int fsr, unsigned int sig, int code, - struct pt_regs *regs) +__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, + int code, struct pt_regs *regs) { + struct task_struct *tsk = current; + if (addr > TASK_SIZE) harden_branch_predictor(); #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { - printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", + pr_err("8<--- cut here ---\n"); + pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", tsk->comm, sig, addr, fsr); - show_pte(tsk->mm, addr); + show_pte(KERN_ERR, tsk->mm, addr); show_regs(regs); } #endif @@ -180,7 +182,7 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - force_sig_fault(sig, code, (void __user *)addr, tsk); + force_sig_fault(sig, code, (void __user *)addr); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -193,7 +195,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * have no context to handle this fault with. */ if (user_mode(regs)) - __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); else __do_kernel_fault(mm, addr, fsr, regs); } @@ -389,7 +391,7 @@ retry: SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, fsr, sig, code, regs); + __do_user_fault(addr, fsr, sig, code, regs); return 0; no_context: @@ -553,9 +555,10 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return; + pr_alert("8<--- cut here ---\n"); pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); - show_pte(current->mm, addr); + show_pte(KERN_ALERT, current->mm, addr); arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, fsr, 0); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 749a5a6f6143..4920a206dce9 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -239,6 +239,22 @@ static void __init arm_initrd_init(void) #endif } +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND +void check_cpu_icache_size(int cpuid) +{ + u32 size, ctr; + + asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr)); + + size = 1 << ((ctr & 0xf) + 2); + if (cpuid != 0 && icache_size != size) + pr_info("CPU%u: detected I-Cache line size mismatch, workaround enabled\n", + cpuid); + if (icache_size > size) + icache_size = size; +} +#endif + void __init arm_memblock_init(const struct machine_desc *mdesc) { /* Register the kernel text, kernel data and initrd with memblock. */ @@ -447,12 +463,6 @@ static void __init free_highpages(void) */ void __init mem_init(void) { -#ifdef CONFIG_HAVE_TCM - /* These pointers are filled in on TCM detection */ - extern u32 dtcm_end; - extern u32 itcm_end; -#endif - set_max_mapnr(pfn_to_page(max_pfn) - mem_map); /* this will put all unused low memory onto the freelists */ diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 6b045c6653ea..941356d95a67 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -8,6 +8,8 @@ /* the upper-most page table pointer */ extern pmd_t *top_pmd; +extern int icache_size; + /* * 0xffff8000 to 0xffffffff is reserved for any ARM architecture * specific hacks for copying pages efficiently, while 0xffff4000 diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 1aa2586fa597..d9a0038774a6 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -729,7 +729,7 @@ static void __init *early_alloc(unsigned long sz) static void *__init late_alloc(unsigned long sz) { - void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); + void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz)); if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) BUG(); diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c index 0f5faf30d9bf..d546efad7e97 100644 --- a/arch/arm/mm/pageattr.c +++ b/arch/arm/mm/pageattr.c @@ -14,8 +14,7 @@ struct page_change_data { pgprot_t clear_mask; }; -static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = *ptep; diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 83741c31757d..c4e8006a1a8c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -389,6 +389,11 @@ __ca12_errata: orr r10, r10, #1 << 24 @ set bit #24 mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif +#ifdef CONFIG_ARM_ERRATA_857271 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #3 << 10 @ set bits #10 and #11 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif b __errata_finish __ca17_errata: @@ -404,6 +409,11 @@ __ca17_errata: orrle r10, r10, #1 << 12 @ set bit #12 mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif +#ifdef CONFIG_ARM_ERRATA_857272 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #3 << 10 @ set bits #10 and #11 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif b __errata_finish __v7_pj4b_setup: diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c index be8d87be4b93..598b636615a2 100644 --- a/arch/arm/mm/ptdump_debugfs.c +++ b/arch/arm/mm/ptdump_debugfs.c @@ -24,11 +24,7 @@ static const struct file_operations ptdump_fops = { .release = single_release, }; -int ptdump_debugfs_register(struct ptdump_info *info, const char *name) +void ptdump_debugfs_register(struct ptdump_info *info, const char *name) { - struct dentry *pe; - - pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); - return pe ? 0 : -ENOMEM; - + debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); } diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index adff54c312bf..97dc386e3cb8 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -733,7 +733,8 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[], /* ALU operation */ emit_alu_r(rd[1], rs, true, false, op, ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); } arm_bpf_put_reg64(dst, rd, ctx); @@ -755,8 +756,9 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], struct jit_ctx *ctx) { if (!is64) { emit_a32_mov_r(dst_lo, src_lo, ctx); - /* Zero out high 4 bytes */ - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ @@ -1057,17 +1059,20 @@ static inline void emit_ldx_r(const s8 dst[], const s8 src, case BPF_B: /* Load a Byte */ emit(ARM_LDRB_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_H: /* Load a HalfWord */ emit(ARM_LDRH_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_W: /* Load a Word */ emit(ARM_LDR_I(rd[1], rm, off), ctx); - emit_a32_mov_i(rd[0], 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(rd[0], 0, ctx); break; case BPF_DW: /* Load a Double Word */ @@ -1356,6 +1361,11 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_a32_mov_i(dst_hi, 0, ctx); + break; + } emit_a32_mov_r64(is64, dst, src, ctx); break; case BPF_K: @@ -1435,7 +1445,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) } emit_udivmod(rd_lo, rd_lo, rt, ctx, BPF_OP(code)); arm_bpf_put_reg32(dst_lo, rd_lo, ctx); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1450,7 +1461,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return -EINVAL; if (imm) emit_a32_alu_i(dst_lo, imm, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: @@ -1485,7 +1497,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = ~dst */ case BPF_ALU | BPF_NEG: emit_a32_alu_i(dst_lo, 0, ctx, BPF_OP(code)); - emit_a32_mov_i(dst_hi, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_a32_mov_i(dst_hi, 0, ctx); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: @@ -1541,11 +1554,13 @@ emit_bswap_uxt: #else /* ARMv6+ */ emit(ARM_UXTH(rd[1], rd[1]), ctx); #endif - emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 32: /* zero-extend 32 bits into 64 bits */ - emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(ARM_EOR_R(rd[0], rd[0], rd[0]), ctx); break; case 64: /* nop */ @@ -1835,6 +1850,11 @@ void bpf_jit_compile(struct bpf_prog *prog) /* Nothing to do here. We support Internal BPF. */ } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index aaf479a9e92d..6da7dc4d79cc 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -447,3 +447,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 1f5ec9741e6d..ca85df247775 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -12,8 +12,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - -z max-page-size=4096 -z common-page-size=4096 \ - -nostdlib -shared $(ldflags-y) \ + -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ $(call ld-option, --hash-style=sysv) \ $(call ld-option, --build-id) \ -T diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..a36ff61321ce 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS + select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -107,6 +108,8 @@ config ARM64 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY + select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT) select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -140,6 +143,7 @@ config ARM64 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -160,6 +164,7 @@ config ARM64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_GENERIC_VDSO select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING @@ -260,10 +265,8 @@ config GENERIC_CALIBRATE_DELAY def_bool y config ZONE_DMA32 - def_bool y - -config HAVE_GENERIC_GUP - def_bool y + bool "Support DMA32 zone" if EXPERT + default y config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y @@ -933,7 +936,6 @@ config PARAVIRT config PARAVIRT_TIME_ACCOUNTING bool "Paravirtual steal time accounting" select PARAVIRT - default n help Select this option to enable fine granularity task steal time accounting. Time spent executing other tasks in parallel with @@ -994,7 +996,7 @@ config CRASH_DUMP reserved region and then later executed after a crash by kdump/kexec. - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config XEN_DOM0 def_bool y @@ -1418,12 +1420,27 @@ config ARM64_SVE KVM in the same kernel image. config ARM64_MODULE_PLTS - bool + bool "Use PLTs to allow module memory to spill over into vmalloc area" + depends on MODULES select HAVE_MOD_ARCH_SPECIFIC + help + Allocate PLTs when loading modules so that jumps and calls whose + targets are too far away for their relative offsets to be encoded + in the instructions themselves can be bounced via veneers in the + module's PLT. This allows modules to be allocated in the generic + vmalloc area after the dedicated module memory area has been + exhausted. + + When running with address space randomization (KASLR), the module + region itself may be too far away for ordinary relative jumps and + calls, and so in that case, module PLTs are required and cannot be + disabled. + + Specific errata workaround(s) might also force module PLTs to be + enabled (ARM64_ERRATUM_843419). config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" - depends on BROKEN # 1556553607-46531-1-git-send-email-julien.thierry@arm.com select CONFIG_ARM_GIC_V3 help Adds support for mimicking Non-Maskable Interrupts through the use of @@ -1436,6 +1453,17 @@ config ARM64_PSEUDO_NMI If unsure, say N +if ARM64_PSEUDO_NMI +config ARM64_DEBUG_PRIORITY_MASKING + bool "Debug interrupt priority masking" + help + This adds runtime checks to functions enabling/disabling + interrupts when using priority masking. The additional checks verify + the validity of ICC_PMR_EL1 when calling concerned functions. + + If unsure, say N +endif + config RELOCATABLE bool help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index e9d2e578cbe6..bb1f1dbb34e8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -30,8 +30,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -KBUILD_DEFCONFIG := defconfig - # Check for binutils support for specific extensions lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1) @@ -49,10 +47,26 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable) endif endif -KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) +ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y) + CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%) + + ifeq ($(CONFIG_CC_IS_CLANG), y) + $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built) + else ifeq ($(CROSS_COMPILE_COMPAT),) + $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built) + else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),) + $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT) + else + export CROSS_COMPILE_COMPAT + export CONFIG_COMPAT_VDSO := y + compat_vdso := -DCONFIG_COMPAT_VDSO=1 + endif +endif + +KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-disable-warning, psabi) -KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) +KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) @@ -164,6 +178,9 @@ ifeq ($(KBUILD_EXTMOD),) prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h + $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \ + $(build)=arch/arm64/kernel/vdso32 \ + include/generated/vdso32-offsets.h) endif define archhelp diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 470dcfd9de91..4b0f674df849 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -539,6 +539,14 @@ interrupts = <16 4>; }; + ocram-ecc@ff8cc000 { + compatible = "altr,socfpga-s10-ocram-ecc", + "altr,socfpga-a10-ocram-ecc"; + reg = <0xff8cc000 0x100>; + altr,ecc-parent = <&ocram>; + interrupts = <1 4>; + }; + usb0-ecc@ff8c4000 { compatible = "altr,socfpga-s10-usb-ecc", "altr,socfpga-usb-ecc"; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index 84f9f5902e74..66e4ffb4e929 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -56,6 +56,17 @@ clock-frequency = <25000000>; }; }; + + eccmgr { + sdmmca-ecc@ff8c8c00 { + compatible = "altr,socfpga-s10-sdmmc-ecc", + "altr,socfpga-sdmmc-ecc"; + reg = <0xff8c8c00 0x100>; + altr,ecc-parent = <&mmc>; + interrupts = <14 4>, + <15 4>; + }; + }; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index b04581249f0b..22a1c74dddf3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -28,7 +28,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; cpu1: cpu@1 { @@ -38,7 +38,7 @@ enable-method = "psci"; clocks = <&clockgen 1 0>; next-level-cache = <&l2>; - cpu-idle-states = <&CPU_PH20>; + cpu-idle-states = <&CPU_PW20>; }; l2: l2-cache { @@ -53,13 +53,13 @@ */ entry-method = "arm,psci"; - CPU_PH20: cpu-ph20 { - compatible = "arm,idle-state"; - idle-state-name = "PH20"; - arm,psci-suspend-param = <0x00010000>; - entry-latency-us = <1000>; - exit-latency-us = <1000>; - min-residency-us = <3000>; + CPU_PW20: cpu-pw20 { + compatible = "arm,idle-state"; + idle-state-name = "PW20"; + arm,psci-suspend-param = <0x0>; + entry-latency-us = <2000>; + exit-latency-us = <2000>; + min-residency-us = <6000>; }; }; @@ -431,6 +431,12 @@ compatible = "fsl,enetc"; reg = <0x000100 0 0 0 0>; }; + ethernet@0,4 { + compatible = "fsl,enetc-ptp"; + reg = <0x000400 0 0 0 0>; + clocks = <&clockgen 4 0>; + little-endian; + }; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 661137ffa319..dacd8cf03a7f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -609,6 +609,14 @@ <GIC_SPI 209 IRQ_TYPE_LEVEL_HIGH>; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 0>; + little-endian; + fsl,extts-fifo; + }; + cluster1_core0_watchdog: wdt@c000000 { compatible = "arm,sp805-wdt", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index d7e78dcd153d..3ace91945b72 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -321,6 +321,14 @@ }; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 1>; + little-endian; + fsl,extts-fifo; + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi index 125a8cc2c5b3..e6fdba39453c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi @@ -848,6 +848,14 @@ dma-coherent; }; + ptp-timer@8b95000 { + compatible = "fsl,dpaa2-ptp"; + reg = <0x0 0x8b95000 0x0 0x100>; + clocks = <&clockgen 4 1>; + little-endian; + fsl,extts-fifo; + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, diff --git a/arch/arm64/boot/dts/freescale/imx8mn-pinfunc.h b/arch/arm64/boot/dts/freescale/imx8mn-pinfunc.h new file mode 100644 index 000000000000..faf1e69e742b --- /dev/null +++ b/arch/arm64/boot/dts/freescale/imx8mn-pinfunc.h @@ -0,0 +1,646 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Copyright 2018-2019 NXP + */ + +#ifndef __DTS_IMX8MN_PINFUNC_H +#define __DTS_IMX8MN_PINFUNC_H + +/* + * The pin function ID is a tuple of + * <mux_reg conf_reg input_reg mux_mode input_val> + */ + +#define MX8MN_IOMUXC_BOOT_MODE2_CCMSRCGPCMIX_BOOT_MODE2 0x020 0x25C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_BOOT_MODE2_I2C1_SCL 0x020 0x25C 0x55C 0x1 0x3 +#define MX8MN_IOMUXC_BOOT_MODE3_CCMSRCGPCMIX_BOOT_MODE3 0x024 0x260 0x000 0x0 0x0 +#define MX8MN_IOMUXC_BOOT_MODE3_I2C1_SDA 0x024 0x260 0x56C 0x1 0x3 +#define MX8MN_IOMUXC_GPIO1_IO00_GPIO1_IO0 0x028 0x290 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO00_CCMSRCGPCMIX_ENET_PHY_REF_CLK_ROOT 0x028 0x290 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO00_ANAMIX_REF_CLK_32K 0x028 0x290 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO00_CCMSRCGPCMIX_EXT_CLK1 0x028 0x290 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO01_GPIO1_IO1 0x02C 0x294 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO01_PWM1_OUT 0x02C 0x294 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO01_ANAMIX_REF_CLK_24M 0x02C 0x294 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO01_CCMSRCGPCMIX_EXT_CLK2 0x02C 0x294 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO02_GPIO1_IO2 0x030 0x298 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO02_WDOG1_WDOG_B 0x030 0x298 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO02_WDOG1_WDOG_ANY 0x030 0x298 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO03_GPIO1_IO3 0x034 0x29C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO03_USDHC1_VSELECT 0x034 0x29C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO03_SDMA1_EXT_EVENT0 0x034 0x29C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO03_ANAMIX_XTAL_OK 0x034 0x29C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO04_GPIO1_IO4 0x038 0x2A0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO04_USDHC2_VSELECT 0x038 0x2A0 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO04_SDMA1_EXT_EVENT1 0x038 0x2A0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO04_ANAMIX_XTAL_OK_LV 0x038 0x2A0 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO05_GPIO1_IO5 0x03C 0x2A4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO05_M4_NMI 0x03C 0x2A4 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO05_CCMSRCGPCMIX_PMIC_READY 0x03C 0x2A4 0x4BC 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO05_CCMSRCGPCMIX_INT_BOOT 0x03C 0x2A4 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO06_GPIO1_IO6 0x040 0x2A8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO06_ENET1_MDC 0x040 0x2A8 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO06_USDHC1_CD_B 0x040 0x2A8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO06_CCMSRCGPCMIX_EXT_CLK3 0x040 0x2A8 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO07_GPIO1_IO7 0x044 0x2AC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO07_ENET1_MDIO 0x044 0x2AC 0x4C0 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO07_USDHC1_WP 0x044 0x2AC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO07_CCMSRCGPCMIX_EXT_CLK4 0x044 0x2AC 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO08_GPIO1_IO8 0x048 0x2B0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO08_ENET1_1588_EVENT0_IN 0x048 0x2B0 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO08_PWM1_OUT 0x048 0x2B0 0x000 0x2 0x0 +#define MX8MN_IOMUXC_GPIO1_IO08_USDHC2_RESET_B 0x048 0x2B0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO08_CCMSRCGPCMIX_WAIT 0x048 0x2B0 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x04C 0x2B4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_ENET1_1588_EVENT0_OUT 0x04C 0x2B4 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_PWM2_OUT 0x04C 0x2B4 0x000 0x2 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_USDHC3_RESET_B 0x04C 0x2B4 0x000 0x4 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_SDMA2_EXT_EVENT0 0x04C 0x2B4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO09_CCMSRCGPCMIX_STOP 0x04C 0x2B4 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO10_GPIO1_IO10 0x050 0x2B8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO10_USB1_OTG_ID 0x050 0x2B8 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO10_PWM3_OUT 0x050 0x2B8 0x000 0x2 0x0 +#define MX8MN_IOMUXC_GPIO1_IO11_GPIO1_IO11 0x054 0x2BC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO11_PWM2_OUT 0x054 0x2BC 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO11_USDHC3_VSELECT 0x054 0x2BC 0x000 0x4 0x0 +#define MX8MN_IOMUXC_GPIO1_IO11_CCMSRCGPCMIX_PMIC_READY 0x054 0x2BC 0x4BC 0x5 0x1 +#define MX8MN_IOMUXC_GPIO1_IO11_CCMSRCGPCMIX_OUT0 0x054 0x2BC 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO12_GPIO1_IO12 0x058 0x2C0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO12_USB1_OTG_PWR 0x058 0x2C0 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO12_SDMA2_EXT_EVENT1 0x058 0x2C0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO12_CCMSRCGPCMIX_OUT1 0x058 0x2C0 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO13_GPIO1_IO13 0x05C 0x2C4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO13_USB1_OTG_OC 0x05C 0x2C4 0x000 0x1 0x0 +#define MX8MN_IOMUXC_GPIO1_IO13_PWM2_OUT 0x05C 0x2C4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO13_CCMSRCGPCMIX_OUT2 0x05C 0x2C4 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO14_GPIO1_IO14 0x060 0x2C8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO14_USDHC3_CD_B 0x060 0x2C8 0x598 0x4 0x2 +#define MX8MN_IOMUXC_GPIO1_IO14_PWM3_OUT 0x060 0x2C8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO14_CCMSRCGPCMIX_CLKO1 0x060 0x2C8 0x000 0x6 0x0 +#define MX8MN_IOMUXC_GPIO1_IO15_GPIO1_IO15 0x064 0x2CC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_GPIO1_IO15_USDHC3_WP 0x064 0x2CC 0x5B8 0x4 0x2 +#define MX8MN_IOMUXC_GPIO1_IO15_PWM4_OUT 0x064 0x2CC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_GPIO1_IO15_CCMSRCGPCMIX_CLKO2 0x064 0x2CC 0x000 0x6 0x0 +#define MX8MN_IOMUXC_ENET_MDC_ENET1_MDC 0x068 0x2D0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_MDC_SAI6_TX_DATA0 0x068 0x2D0 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_MDC_PDM_BIT_STREAM3 0x068 0x2D0 0x540 0x3 0x1 +#define MX8MN_IOMUXC_ENET_MDC_SPDIF1_OUT 0x068 0x2D0 0x000 0x4 0x0 +#define MX8MN_IOMUXC_ENET_MDC_GPIO1_IO16 0x068 0x2D0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_MDC_USDHC3_STROBE 0x068 0x2D0 0x59C 0x6 0x1 +#define MX8MN_IOMUXC_ENET_MDIO_ENET1_MDIO 0x06C 0x2D4 0x4C0 0x0 0x1 +#define MX8MN_IOMUXC_ENET_MDIO_SAI6_TX_SYNC 0x06C 0x2D4 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_MDIO_PDM_BIT_STREAM2 0x06C 0x2D4 0x53C 0x3 0x1 +#define MX8MN_IOMUXC_ENET_MDIO_SPDIF1_IN 0x06C 0x2D4 0x5CC 0x4 0x1 +#define MX8MN_IOMUXC_ENET_MDIO_GPIO1_IO17 0x06C 0x2D4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_MDIO_USDHC3_DATA5 0x06C 0x2D4 0x550 0x6 0x1 +#define MX8MN_IOMUXC_ENET_TD3_ENET1_RGMII_TD3 0x070 0x2D8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TD3_SAI6_TX_BCLK 0x070 0x2D8 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TD3_PDM_BIT_STREAM1 0x070 0x2D8 0x538 0x3 0x1 +#define MX8MN_IOMUXC_ENET_TD3_SPDIF1_EXT_CLK 0x070 0x2D8 0x568 0x4 0x1 +#define MX8MN_IOMUXC_ENET_TD3_GPIO1_IO18 0x070 0x2D8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TD3_USDHC3_DATA6 0x070 0x2D8 0x584 0x6 0x1 +#define MX8MN_IOMUXC_ENET_TD2_ENET1_RGMII_TD2 0x074 0x2DC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TD2_ENET1_TX_CLK 0x074 0x2DC 0x5A4 0x1 0x0 +#define MX8MN_IOMUXC_ENET_TD2_CCMSRCGPCMIX_ENET_REF_CLK_ROOT 0x074 0x2DC 0x5A4 0x1 0x0 +#define MX8MN_IOMUXC_ENET_TD2_SAI6_RX_DATA0 0x074 0x2DC 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TD2_PDM_BIT_STREAM3 0x074 0x2DC 0x540 0x3 0x2 +#define MX8MN_IOMUXC_ENET_TD2_GPIO1_IO19 0x074 0x2DC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TD2_USDHC3_DATA7 0x074 0x2DC 0x54C 0x6 0x1 +#define MX8MN_IOMUXC_ENET_TD1_ENET1_RGMII_TD1 0x078 0x2E0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TD1_SAI6_RX_SYNC 0x078 0x2E0 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TD1_PDM_BIT_STREAM2 0x078 0x2E0 0x53C 0x3 0x2 +#define MX8MN_IOMUXC_ENET_TD1_GPIO1_IO20 0x078 0x2E0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TD1_USDHC3_CD_B 0x078 0x2E0 0x598 0x6 0x3 +#define MX8MN_IOMUXC_ENET_TD0_ENET1_RGMII_TD0 0x07C 0x2E4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TD0_SAI6_RX_BCLK 0x07C 0x2E4 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TD0_PDM_BIT_STREAM1 0x07C 0x2E4 0x538 0x3 0x2 +#define MX8MN_IOMUXC_ENET_TD0_GPIO1_IO21 0x07C 0x2E4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TD0_USDHC3_WP 0x07C 0x2E4 0x5B8 0x6 0x3 +#define MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x080 0x2E8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TX_CTL_SAI6_MCLK 0x080 0x2E8 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TX_CTL_GPIO1_IO22 0x080 0x2E8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TX_CTL_USDHC3_DATA0 0x080 0x2E8 0x5B4 0x6 0x1 +#define MX8MN_IOMUXC_ENET_TXC_ENET1_RGMII_TXC 0x084 0x2EC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_TXC_ENET1_TX_ER 0x084 0x2EC 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ENET_TXC_SAI7_TX_DATA0 0x084 0x2EC 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_TXC_GPIO1_IO23 0x084 0x2EC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_TXC_USDHC3_DATA1 0x084 0x2EC 0x5B0 0x6 0x1 +#define MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x088 0x2F0 0x574 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RX_CTL_SAI7_TX_SYNC 0x088 0x2F0 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RX_CTL_PDM_BIT_STREAM3 0x088 0x2F0 0x540 0x3 0x3 +#define MX8MN_IOMUXC_ENET_RX_CTL_GPIO1_IO24 0x088 0x2F0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RX_CTL_USDHC3_DATA2 0x088 0x2F0 0x5E4 0x6 0x1 +#define MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x08C 0x2F4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RXC_ENET1_RX_ER 0x08C 0x2F4 0x5C8 0x1 0x0 +#define MX8MN_IOMUXC_ENET_RXC_SAI7_TX_BCLK 0x08C 0x2F4 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RXC_PDM_BIT_STREAM2 0x08C 0x2F4 0x53C 0x3 0x3 +#define MX8MN_IOMUXC_ENET_RXC_GPIO1_IO25 0x08C 0x2F4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RXC_USDHC3_DATA3 0x08C 0x2F4 0x5E0 0x6 0x1 +#define MX8MN_IOMUXC_ENET_RD0_ENET1_RGMII_RD0 0x090 0x2F8 0x57C 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RD0_SAI7_RX_DATA0 0x090 0x2F8 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RD0_PDM_BIT_STREAM1 0x090 0x2F8 0x538 0x3 0x3 +#define MX8MN_IOMUXC_ENET_RD0_GPIO1_IO26 0x090 0x2F8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RD0_USDHC3_DATA4 0x090 0x2F8 0x558 0x6 0x1 +#define MX8MN_IOMUXC_ENET_RD1_ENET1_RGMII_RD1 0x094 0x2FC 0x554 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RD1_SAI7_RX_SYNC 0x094 0x2FC 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RD1_PDM_BIT_STREAM0 0x094 0x2FC 0x534 0x3 0x1 +#define MX8MN_IOMUXC_ENET_RD1_GPIO1_IO27 0x094 0x2FC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RD1_USDHC3_RESET_B 0x094 0x2FC 0x000 0x6 0x0 +#define MX8MN_IOMUXC_ENET_RD2_ENET1_RGMII_RD2 0x098 0x300 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RD2_SAI7_RX_BCLK 0x098 0x300 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RD2_PDM_CLK 0x098 0x300 0x000 0x3 0x0 +#define MX8MN_IOMUXC_ENET_RD2_GPIO1_IO28 0x098 0x300 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RD2_USDHC3_CLK 0x098 0x300 0x5A0 0x6 0x1 +#define MX8MN_IOMUXC_ENET_RD3_ENET1_RGMII_RD3 0x09C 0x304 0x000 0x0 0x0 +#define MX8MN_IOMUXC_ENET_RD3_SAI7_MCLK 0x09C 0x304 0x000 0x2 0x0 +#define MX8MN_IOMUXC_ENET_RD3_SPDIF1_IN 0x09C 0x304 0x5CC 0x3 0x5 +#define MX8MN_IOMUXC_ENET_RD3_GPIO1_IO29 0x09C 0x304 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ENET_RD3_USDHC3_CMD 0x09C 0x304 0x5DC 0x6 0x1 +#define MX8MN_IOMUXC_SD1_CLK_USDHC1_CLK 0x0A0 0x308 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_CLK_ENET1_MDC 0x0A0 0x308 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD1_CLK_UART1_DCE_TX 0x0A0 0x308 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_CLK_UART1_DTE_RX 0x0A0 0x308 0x4F4 0x4 0x4 +#define MX8MN_IOMUXC_SD1_CLK_GPIO2_IO0 0x0A0 0x308 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_CMD_USDHC1_CMD 0x0A4 0x30C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_CMD_ENET1_MDIO 0x0A4 0x30C 0x4C0 0x1 0x3 +#define MX8MN_IOMUXC_SD1_CMD_UART1_DCE_RX 0x0A4 0x30C 0x4F4 0x4 0x5 +#define MX8MN_IOMUXC_SD1_CMD_UART1_DTE_TX 0x0A4 0x30C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_CMD_GPIO2_IO1 0x0A4 0x30C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA0_USDHC1_DATA0 0x0A8 0x310 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA0_ENET1_RGMII_TD1 0x0A8 0x310 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD1_DATA0_UART1_DCE_RTS_B 0x0A8 0x310 0x4F0 0x4 0x4 +#define MX8MN_IOMUXC_SD1_DATA0_UART1_DTE_CTS_B 0x0A8 0x310 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA0_GPIO2_IO2 0x0A8 0x310 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA1_USDHC1_DATA1 0x0AC 0x314 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA1_ENET1_RGMII_TD0 0x0AC 0x314 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD1_DATA1_UART1_DCE_CTS_B 0x0AC 0x314 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA1_UART1_DTE_RTS_B 0x0AC 0x314 0x4F0 0x4 0x5 +#define MX8MN_IOMUXC_SD1_DATA1_GPIO2_IO3 0x0AC 0x314 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA2_USDHC1_DATA2 0x0B0 0x318 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA2_ENET1_RGMII_RD0 0x0B0 0x318 0x57C 0x1 0x1 +#define MX8MN_IOMUXC_SD1_DATA2_UART2_DCE_TX 0x0B0 0x318 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA2_UART2_DTE_RX 0x0B0 0x318 0x4FC 0x4 0x4 +#define MX8MN_IOMUXC_SD1_DATA2_GPIO2_IO4 0x0B0 0x318 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA3_USDHC1_DATA3 0x0B4 0x31C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA3_ENET1_RGMII_RD1 0x0B4 0x31C 0x554 0x1 0x1 +#define MX8MN_IOMUXC_SD1_DATA3_UART2_DCE_RX 0x0B4 0x31C 0x4FC 0x4 0x5 +#define MX8MN_IOMUXC_SD1_DATA3_UART2_DTE_TX 0x0B4 0x31C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA3_GPIO2_IO5 0x0B4 0x31C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA4_USDHC1_DATA4 0x0B8 0x320 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA4_ENET1_RGMII_TX_CTL 0x0B8 0x320 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD1_DATA4_I2C1_SCL 0x0B8 0x320 0x55C 0x3 0x1 +#define MX8MN_IOMUXC_SD1_DATA4_UART2_DCE_RTS_B 0x0B8 0x320 0x4F8 0x4 0x4 +#define MX8MN_IOMUXC_SD1_DATA4_UART2_DTE_CTS_B 0x0B8 0x320 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA4_GPIO2_IO6 0x0B8 0x320 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA5_USDHC1_DATA5 0x0BC 0x324 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA5_ENET1_TX_ER 0x0BC 0x324 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD1_DATA5_I2C1_SDA 0x0BC 0x324 0x56C 0x3 0x1 +#define MX8MN_IOMUXC_SD1_DATA5_UART2_DCE_CTS_B 0x0BC 0x324 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA5_UART2_DTE_RTS_B 0x0BC 0x324 0x4F8 0x4 0x5 +#define MX8MN_IOMUXC_SD1_DATA5_GPIO2_IO7 0x0BC 0x324 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA6_USDHC1_DATA6 0x0C0 0x328 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA6_ENET1_RGMII_RX_CTL 0x0C0 0x328 0x574 0x1 0x1 +#define MX8MN_IOMUXC_SD1_DATA6_I2C2_SCL 0x0C0 0x328 0x5D0 0x3 0x1 +#define MX8MN_IOMUXC_SD1_DATA6_UART3_DCE_TX 0x0C0 0x328 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA6_UART3_DTE_RX 0x0C0 0x328 0x504 0x4 0x4 +#define MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x0C0 0x328 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_DATA7_USDHC1_DATA7 0x0C4 0x32C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_DATA7_ENET1_RX_ER 0x0C4 0x32C 0x5C8 0x1 0x1 +#define MX8MN_IOMUXC_SD1_DATA7_I2C2_SDA 0x0C4 0x32C 0x560 0x3 0x1 +#define MX8MN_IOMUXC_SD1_DATA7_UART3_DCE_RX 0x0C4 0x32C 0x504 0x4 0x5 +#define MX8MN_IOMUXC_SD1_DATA7_UART3_DTE_TX 0x0C4 0x32C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_DATA7_GPIO2_IO9 0x0C4 0x32C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_RESET_B_USDHC1_RESET_B 0x0C8 0x330 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_RESET_B_ENET1_TX_CLK 0x0C8 0x330 0x5A4 0x1 0x1 +#define MX8MN_IOMUXC_SD1_RESET_B_CCMSRCGPCMIX_ENET_REF_CLK_ROOT 0x0C8 0x330 0x5A4 0x1 0x0 +#define MX8MN_IOMUXC_SD1_RESET_B_I2C3_SCL 0x0C8 0x330 0x588 0x3 0x1 +#define MX8MN_IOMUXC_SD1_RESET_B_UART3_DCE_RTS_B 0x0C8 0x330 0x500 0x4 0x2 +#define MX8MN_IOMUXC_SD1_RESET_B_UART3_DTE_CTS_B 0x0C8 0x330 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_RESET_B_GPIO2_IO10 0x0C8 0x330 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD1_STROBE_USDHC1_STROBE 0x0CC 0x334 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD1_STROBE_I2C3_SDA 0x0CC 0x334 0x5BC 0x3 0x1 +#define MX8MN_IOMUXC_SD1_STROBE_UART3_DCE_CTS_B 0x0CC 0x334 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD1_STROBE_UART3_DTE_RTS_B 0x0CC 0x334 0x500 0x4 0x3 +#define MX8MN_IOMUXC_SD1_STROBE_GPIO2_IO11 0x0CC 0x334 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_CD_B_USDHC2_CD_B 0x0D0 0x338 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_CD_B_GPIO2_IO12 0x0D0 0x338 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_CD_B_CCMSRCGPCMIX_TESTER_ACK 0x0D0 0x338 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_CLK_USDHC2_CLK 0x0D4 0x33C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_CLK_SAI5_RX_SYNC 0x0D4 0x33C 0x4E4 0x1 0x1 +#define MX8MN_IOMUXC_SD2_CLK_ECSPI2_SCLK 0x0D4 0x33C 0x580 0x2 0x1 +#define MX8MN_IOMUXC_SD2_CLK_UART4_DCE_RX 0x0D4 0x33C 0x50C 0x3 0x4 +#define MX8MN_IOMUXC_SD2_CLK_UART4_DTE_TX 0x0D4 0x33C 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SD2_CLK_SAI5_MCLK 0x0D4 0x33C 0x594 0x4 0x1 +#define MX8MN_IOMUXC_SD2_CLK_GPIO2_IO13 0x0D4 0x33C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_CLK_CCMSRCGPCMIX_OBSERVE0 0x0D4 0x33C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_CMD_USDHC2_CMD 0x0D8 0x340 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_CMD_SAI5_RX_BCLK 0x0D8 0x340 0x4D0 0x1 0x1 +#define MX8MN_IOMUXC_SD2_CMD_ECSPI2_MOSI 0x0D8 0x340 0x590 0x2 0x1 +#define MX8MN_IOMUXC_SD2_CMD_UART4_DCE_TX 0x0D8 0x340 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SD2_CMD_UART4_DTE_RX 0x0D8 0x340 0x50C 0x3 0x5 +#define MX8MN_IOMUXC_SD2_CMD_PDM_CLK 0x0D8 0x340 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SD2_CMD_GPIO2_IO14 0x0D8 0x340 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_CMD_CCMSRCGPCMIX_OBSERVE1 0x0D8 0x340 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_DATA0_USDHC2_DATA0 0x0DC 0x344 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_DATA0_SAI5_RX_DATA0 0x0DC 0x344 0x4D4 0x1 0x1 +#define MX8MN_IOMUXC_SD2_DATA0_I2C4_SDA 0x0DC 0x344 0x58C 0x2 0x1 +#define MX8MN_IOMUXC_SD2_DATA0_UART2_DCE_RX 0x0DC 0x344 0x4FC 0x3 0x6 +#define MX8MN_IOMUXC_SD2_DATA0_UART2_DTE_TX 0x0DC 0x344 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SD2_DATA0_PDM_BIT_STREAM0 0x0DC 0x344 0x534 0x4 0x2 +#define MX8MN_IOMUXC_SD2_DATA0_GPIO2_IO15 0x0DC 0x344 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_DATA0_CCMSRCGPCMIX_OBSERVE2 0x0DC 0x344 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_DATA1_USDHC2_DATA1 0x0E0 0x348 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_DATA1_SAI5_TX_SYNC 0x0E0 0x348 0x4EC 0x1 0x1 +#define MX8MN_IOMUXC_SD2_DATA1_I2C4_SCL 0x0E0 0x348 0x5D4 0x2 0x1 +#define MX8MN_IOMUXC_SD2_DATA1_UART2_DCE_TX 0x0E0 0x348 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SD2_DATA1_UART2_DTE_RX 0x0E0 0x348 0x4FC 0x3 0x7 +#define MX8MN_IOMUXC_SD2_DATA1_PDM_BIT_STREAM1 0x0E0 0x348 0x538 0x4 0x4 +#define MX8MN_IOMUXC_SD2_DATA1_GPIO2_IO16 0x0E0 0x348 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_DATA1_CCMSRCGPCMIX_WAIT 0x0E0 0x348 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_DATA2_USDHC2_DATA2 0x0E4 0x34C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_DATA2_SAI5_TX_BCLK 0x0E4 0x34C 0x4E8 0x1 0x1 +#define MX8MN_IOMUXC_SD2_DATA2_ECSPI2_SS0 0x0E4 0x34C 0x570 0x2 0x2 +#define MX8MN_IOMUXC_SD2_DATA2_SPDIF1_OUT 0x0E4 0x34C 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SD2_DATA2_PDM_BIT_STREAM2 0x0E4 0x34C 0x53C 0x4 0x4 +#define MX8MN_IOMUXC_SD2_DATA2_GPIO2_IO17 0x0E4 0x34C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_DATA2_CCMSRCGPCMIX_STOP 0x0E4 0x34C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_DATA3_USDHC2_DATA3 0x0E8 0x350 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_DATA3_SAI5_TX_DATA0 0x0E8 0x350 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SD2_DATA3_ECSPI2_MISO 0x0E8 0x350 0x578 0x2 0x1 +#define MX8MN_IOMUXC_SD2_DATA3_SPDIF1_IN 0x0E8 0x350 0x5CC 0x3 0x2 +#define MX8MN_IOMUXC_SD2_DATA3_PDM_BIT_STREAM3 0x0E8 0x350 0x540 0x4 0x4 +#define MX8MN_IOMUXC_SD2_DATA3_GPIO2_IO18 0x0E8 0x350 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_DATA3_CCMSRCGPCMIX_EARLY_RESET 0x0E8 0x350 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_RESET_B_USDHC2_RESET_B 0x0EC 0x354 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_RESET_B_GPIO2_IO19 0x0EC 0x354 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_RESET_B_CCMSRCGPCMIX_SYSTEM_RESET 0x0EC 0x354 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SD2_WP_USDHC2_WP 0x0F0 0x358 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SD2_WP_GPIO2_IO20 0x0F0 0x358 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SD2_WP_CORESIGHT_EVENTI 0x0F0 0x358 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_ALE_RAWNAND_ALE 0x0F4 0x35C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_ALE_QSPI_A_SCLK 0x0F4 0x35C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_ALE_PDM_BIT_STREAM0 0x0F4 0x35C 0x534 0x3 0x3 +#define MX8MN_IOMUXC_NAND_ALE_UART3_DCE_RX 0x0F4 0x35C 0x504 0x4 0x6 +#define MX8MN_IOMUXC_NAND_ALE_UART3_DTE_TX 0x0F4 0x35C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_NAND_ALE_GPIO3_IO0 0x0F4 0x35C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_ALE_CORESIGHT_TRACE_CLK 0x0F4 0x35C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_CE0_B_RAWNAND_CE0_B 0x0F8 0x360 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_CE0_B_QSPI_A_SS0_B 0x0F8 0x360 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_CE0_B_PDM_BIT_STREAM1 0x0F8 0x360 0x538 0x3 0x5 +#define MX8MN_IOMUXC_NAND_CE0_B_UART3_DCE_TX 0x0F8 0x360 0x000 0x4 0x0 +#define MX8MN_IOMUXC_NAND_CE0_B_UART3_DTE_RX 0x0F8 0x360 0x504 0x4 0x7 +#define MX8MN_IOMUXC_NAND_CE0_B_GPIO3_IO1 0x0F8 0x360 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_CE0_B_CORESIGHT_TRACE_CTL 0x0F8 0x360 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_CE1_B_RAWNAND_CE1_B 0x0FC 0x364 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_CE1_B_QSPI_A_SS1_B 0x0FC 0x364 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_CE1_B_USDHC3_STROBE 0x0FC 0x364 0x59C 0x2 0x0 +#define MX8MN_IOMUXC_NAND_CE1_B_PDM_BIT_STREAM0 0x0FC 0x364 0x534 0x3 0x4 +#define MX8MN_IOMUXC_NAND_CE1_B_I2C4_SCL 0x0FC 0x364 0x5D4 0x4 0x2 +#define MX8MN_IOMUXC_NAND_CE1_B_GPIO3_IO2 0x0FC 0x364 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_CE1_B_CORESIGHT_TRACE0 0x0FC 0x364 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_CE2_B_RAWNAND_CE2_B 0x100 0x368 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_CE2_B_QSPI_B_SS0_B 0x100 0x368 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_CE2_B_USDHC3_DATA5 0x100 0x368 0x550 0x2 0x0 +#define MX8MN_IOMUXC_NAND_CE2_B_PDM_BIT_STREAM1 0x100 0x368 0x538 0x3 0x6 +#define MX8MN_IOMUXC_NAND_CE2_B_I2C4_SDA 0x100 0x368 0x58C 0x4 0x2 +#define MX8MN_IOMUXC_NAND_CE2_B_GPIO3_IO3 0x100 0x368 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_CE2_B_CORESIGHT_TRACE1 0x100 0x368 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_CE3_B_RAWNAND_CE3_B 0x104 0x36C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_CE3_B_QSPI_B_SS1_B 0x104 0x36C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_CE3_B_USDHC3_DATA6 0x104 0x36C 0x584 0x2 0x0 +#define MX8MN_IOMUXC_NAND_CE3_B_PDM_BIT_STREAM2 0x104 0x36C 0x53C 0x3 0x5 +#define MX8MN_IOMUXC_NAND_CE3_B_I2C3_SDA 0x104 0x36C 0x5BC 0x4 0x2 +#define MX8MN_IOMUXC_NAND_CE3_B_GPIO3_IO4 0x104 0x36C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_CE3_B_CORESIGHT_TRACE2 0x104 0x36C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_CLE_RAWNAND_CLE 0x108 0x370 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_CLE_QSPI_B_SCLK 0x108 0x370 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_CLE_USDHC3_DATA7 0x108 0x370 0x54C 0x2 0x0 +#define MX8MN_IOMUXC_NAND_CLE_GPIO3_IO5 0x108 0x370 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_CLE_CORESIGHT_TRACE3 0x108 0x370 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA00_RAWNAND_DATA00 0x10C 0x374 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA00_QSPI_A_DATA0 0x10C 0x374 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA00_PDM_BIT_STREAM2 0x10C 0x374 0x53C 0x3 0x6 +#define MX8MN_IOMUXC_NAND_DATA00_UART4_DCE_RX 0x10C 0x374 0x50C 0x4 0x6 +#define MX8MN_IOMUXC_NAND_DATA00_UART4_DTE_TX 0x10C 0x374 0x000 0x4 0x0 +#define MX8MN_IOMUXC_NAND_DATA00_GPIO3_IO6 0x10C 0x374 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA00_CORESIGHT_TRACE4 0x10C 0x374 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA01_RAWNAND_DATA01 0x110 0x378 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA01_QSPI_A_DATA1 0x110 0x378 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA01_PDM_BIT_STREAM3 0x110 0x378 0x540 0x3 0x5 +#define MX8MN_IOMUXC_NAND_DATA01_UART4_DCE_TX 0x110 0x378 0x000 0x4 0x0 +#define MX8MN_IOMUXC_NAND_DATA01_UART4_DTE_RX 0x110 0x378 0x50C 0x4 0x7 +#define MX8MN_IOMUXC_NAND_DATA01_GPIO3_IO7 0x110 0x378 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA01_CORESIGHT_TRACE5 0x110 0x378 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA02_RAWNAND_DATA02 0x114 0x37C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA02_QSPI_A_DATA2 0x114 0x37C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA02_USDHC3_CD_B 0x114 0x37C 0x598 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA02_I2C4_SDA 0x114 0x37C 0x58C 0x4 0x3 +#define MX8MN_IOMUXC_NAND_DATA02_GPIO3_IO8 0x114 0x37C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA02_CORESIGHT_TRACE6 0x114 0x37C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA03_RAWNAND_DATA03 0x118 0x380 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA03_QSPI_A_DATA3 0x118 0x380 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA03_USDHC3_WP 0x118 0x380 0x5B8 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA03_GPIO3_IO9 0x118 0x380 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA03_CORESIGHT_TRACE7 0x118 0x380 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA04_RAWNAND_DATA04 0x11C 0x384 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA04_QSPI_B_DATA0 0x11C 0x384 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA04_USDHC3_DATA0 0x11C 0x384 0x5B4 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA04_GPIO3_IO10 0x11C 0x384 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA04_CORESIGHT_TRACE8 0x11C 0x384 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA05_RAWNAND_DATA05 0x120 0x388 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA05_QSPI_B_DATA1 0x120 0x388 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA05_USDHC3_DATA1 0x120 0x388 0x5B0 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA05_GPIO3_IO11 0x120 0x388 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA05_CORESIGHT_TRACE9 0x120 0x388 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA06_RAWNAND_DATA06 0x124 0x38C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA06_QSPI_B_DATA2 0x124 0x38C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA06_USDHC3_DATA2 0x124 0x38C 0x5E4 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA06_GPIO3_IO12 0x124 0x38C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA06_CORESIGHT_TRACE10 0x124 0x38C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DATA07_RAWNAND_DATA07 0x128 0x390 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DATA07_QSPI_B_DATA3 0x128 0x390 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DATA07_USDHC3_DATA3 0x128 0x390 0x5E0 0x2 0x0 +#define MX8MN_IOMUXC_NAND_DATA07_GPIO3_IO13 0x128 0x390 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DATA07_CORESIGHT_TRACE11 0x128 0x390 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_DQS_RAWNAND_DQS 0x12C 0x394 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_DQS_QSPI_A_DQS 0x12C 0x394 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_DQS_PDM_CLK 0x12C 0x394 0x000 0x3 0x0 +#define MX8MN_IOMUXC_NAND_DQS_I2C3_SCL 0x12C 0x394 0x588 0x4 0x2 +#define MX8MN_IOMUXC_NAND_DQS_GPIO3_IO14 0x12C 0x394 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_DQS_CORESIGHT_TRACE12 0x12C 0x394 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_RE_B_RAWNAND_RE_B 0x130 0x398 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_RE_B_QSPI_B_DQS 0x130 0x398 0x000 0x1 0x0 +#define MX8MN_IOMUXC_NAND_RE_B_USDHC3_DATA4 0x130 0x398 0x558 0x2 0x0 +#define MX8MN_IOMUXC_NAND_RE_B_PDM_BIT_STREAM1 0x130 0x398 0x538 0x3 0x7 +#define MX8MN_IOMUXC_NAND_RE_B_GPIO3_IO15 0x130 0x398 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_RE_B_CORESIGHT_TRACE13 0x130 0x398 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_READY_B_RAWNAND_READY_B 0x134 0x39C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_READY_B_USDHC3_RESET_B 0x134 0x39C 0x000 0x2 0x0 +#define MX8MN_IOMUXC_NAND_READY_B_PDM_BIT_STREAM3 0x134 0x39C 0x540 0x3 0x6 +#define MX8MN_IOMUXC_NAND_READY_B_I2C3_SCL 0x134 0x39C 0x588 0x4 0x3 +#define MX8MN_IOMUXC_NAND_READY_B_GPIO3_IO16 0x134 0x39C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_READY_B_CORESIGHT_TRACE14 0x134 0x39C 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_WE_B_RAWNAND_WE_B 0x138 0x3A0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_WE_B_USDHC3_CLK 0x138 0x3A0 0x5A0 0x2 0x0 +#define MX8MN_IOMUXC_NAND_WE_B_I2C3_SDA 0x138 0x3A0 0x5BC 0x4 0x3 +#define MX8MN_IOMUXC_NAND_WE_B_GPIO3_IO17 0x138 0x3A0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_WE_B_CORESIGHT_TRACE15 0x138 0x3A0 0x000 0x6 0x0 +#define MX8MN_IOMUXC_NAND_WP_B_RAWNAND_WP_B 0x13C 0x3A4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_NAND_WP_B_USDHC3_CMD 0x13C 0x3A4 0x5DC 0x2 0x0 +#define MX8MN_IOMUXC_NAND_WP_B_I2C4_SDA 0x13C 0x3A4 0x58C 0x4 0x4 +#define MX8MN_IOMUXC_NAND_WP_B_GPIO3_IO18 0x13C 0x3A4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_NAND_WP_B_CORESIGHT_EVENTO 0x13C 0x3A4 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SAI5_RXFS_SAI5_RX_SYNC 0x140 0x3A8 0x4E4 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXFS_GPIO3_IO19 0x140 0x3A8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_RXC_SAI5_RX_BCLK 0x144 0x3AC 0x4D0 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXC_PDM_CLK 0x144 0x3AC 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI5_RXC_GPIO3_IO20 0x144 0x3AC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_RXD0_SAI5_RX_DATA0 0x148 0x3B0 0x4D4 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXD0_PDM_BIT_STREAM0 0x148 0x3B0 0x534 0x4 0x0 +#define MX8MN_IOMUXC_SAI5_RXD0_GPIO3_IO21 0x148 0x3B0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_RXD1_SAI5_RX_DATA1 0x14C 0x3B4 0x4D8 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXD1_SAI5_TX_SYNC 0x14C 0x3B4 0x4EC 0x3 0x0 +#define MX8MN_IOMUXC_SAI5_RXD1_PDM_BIT_STREAM1 0x14C 0x3B4 0x538 0x4 0x0 +#define MX8MN_IOMUXC_SAI5_RXD1_GPIO3_IO22 0x14C 0x3B4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_RXD2_SAI5_RX_DATA2 0x150 0x3B8 0x4DC 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXD2_SAI5_TX_BCLK 0x150 0x3B8 0x4E8 0x3 0x0 +#define MX8MN_IOMUXC_SAI5_RXD2_PDM_BIT_STREAM2 0x150 0x3B8 0x53C 0x4 0x0 +#define MX8MN_IOMUXC_SAI5_RXD2_GPIO3_IO23 0x150 0x3B8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_RXD3_SAI5_RX_DATA3 0x154 0x3BC 0x4E0 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_RXD3_SAI5_TX_DATA0 0x154 0x3BC 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI5_RXD3_PDM_BIT_STREAM3 0x154 0x3BC 0x540 0x4 0x0 +#define MX8MN_IOMUXC_SAI5_RXD3_GPIO3_IO24 0x154 0x3BC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI5_MCLK_SAI5_MCLK 0x158 0x3C0 0x594 0x0 0x0 +#define MX8MN_IOMUXC_SAI5_MCLK_GPIO3_IO25 0x158 0x3C0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_SAI2_RX_SYNC 0x1B0 0x418 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_SAI5_TX_SYNC 0x1B0 0x418 0x4EC 0x1 0x2 +#define MX8MN_IOMUXC_SAI2_RXFS_SAI5_TX_DATA1 0x1B0 0x418 0x000 0x2 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_SAI2_RX_DATA1 0x1B0 0x418 0x5AC 0x3 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_UART1_DCE_TX 0x1B0 0x418 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_UART1_DTE_RX 0x1B0 0x418 0x4F4 0x4 0x2 +#define MX8MN_IOMUXC_SAI2_RXFS_GPIO4_IO21 0x1B0 0x418 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_RXFS_PDM_BIT_STREAM2 0x1B0 0x418 0x53C 0x6 0x7 +#define MX8MN_IOMUXC_SAI2_RXC_SAI2_RX_BCLK 0x1B4 0x41C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_RXC_SAI5_TX_BCLK 0x1B4 0x41C 0x4E8 0x1 0x2 +#define MX8MN_IOMUXC_SAI2_RXC_UART1_DCE_RX 0x1B4 0x41C 0x4F4 0x4 0x3 +#define MX8MN_IOMUXC_SAI2_RXC_UART1_DTE_TX 0x1B4 0x41C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI2_RXC_GPIO4_IO22 0x1B4 0x41C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_RXC_PDM_BIT_STREAM1 0x1B4 0x41C 0x538 0x6 0x8 +#define MX8MN_IOMUXC_SAI2_RXD0_SAI2_RX_DATA0 0x1B8 0x420 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_RXD0_SAI5_TX_DATA0 0x1B8 0x420 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI2_RXD0_SAI2_TX_DATA1 0x1B8 0x420 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI2_RXD0_UART1_DCE_RTS_B 0x1B8 0x420 0x4F0 0x4 0x2 +#define MX8MN_IOMUXC_SAI2_RXD0_UART1_DTE_CTS_B 0x1B8 0x420 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI2_RXD0_GPIO4_IO23 0x1B8 0x420 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_RXD0_PDM_BIT_STREAM3 0x1B8 0x420 0x540 0x6 0x7 +#define MX8MN_IOMUXC_SAI2_TXFS_SAI2_TX_SYNC 0x1BC 0x424 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_TXFS_SAI5_TX_DATA1 0x1BC 0x424 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI2_TXFS_SAI2_TX_DATA1 0x1BC 0x424 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI2_TXFS_UART1_DCE_CTS_B 0x1BC 0x424 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI2_TXFS_UART1_DTE_RTS_B 0x1BC 0x424 0x4F0 0x4 0x3 +#define MX8MN_IOMUXC_SAI2_TXFS_GPIO4_IO24 0x1BC 0x424 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_TXFS_PDM_BIT_STREAM2 0x1BC 0x424 0x53C 0x6 0x8 +#define MX8MN_IOMUXC_SAI2_TXC_SAI2_TX_BCLK 0x1C0 0x428 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_TXC_SAI5_TX_DATA2 0x1C0 0x428 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI2_TXC_GPIO4_IO25 0x1C0 0x428 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_TXC_PDM_BIT_STREAM1 0x1C0 0x428 0x538 0x6 0x9 +#define MX8MN_IOMUXC_SAI2_TXD0_SAI2_TX_DATA0 0x1C4 0x42C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_TXD0_SAI5_TX_DATA3 0x1C4 0x42C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI2_TXD0_GPIO4_IO26 0x1C4 0x42C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_TXD0_CCMSRCGPCMIX_BOOT_MODE4 0x1C4 0x42C 0x540 0x6 0x8 +#define MX8MN_IOMUXC_SAI2_MCLK_SAI2_MCLK 0x1C8 0x430 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI2_MCLK_SAI5_MCLK 0x1C8 0x430 0x594 0x1 0x2 +#define MX8MN_IOMUXC_SAI2_MCLK_GPIO4_IO27 0x1C8 0x430 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI2_MCLK_SAI3_MCLK 0x1C8 0x430 0x5C0 0x6 0x1 +#define MX8MN_IOMUXC_SAI3_RXFS_SAI3_RX_SYNC 0x1CC 0x434 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_RXFS_GPT1_CAPTURE1 0x1CC 0x434 0x5F0 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_RXFS_SAI5_RX_SYNC 0x1CC 0x434 0x4E4 0x2 0x2 +#define MX8MN_IOMUXC_SAI3_RXFS_SAI3_RX_DATA1 0x1CC 0x434 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI3_RXFS_SPDIF1_IN 0x1CC 0x434 0x5CC 0x4 0x3 +#define MX8MN_IOMUXC_SAI3_RXFS_GPIO4_IO28 0x1CC 0x434 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_RXFS_PDM_BIT_STREAM0 0x1CC 0x434 0x534 0x6 0x5 +#define MX8MN_IOMUXC_SAI3_RXC_SAI3_RX_BCLK 0x1D0 0x438 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_RXC_GPT1_CLK 0x1D0 0x438 0x5E8 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_RXC_SAI5_RX_BCLK 0x1D0 0x438 0x4D0 0x2 0x2 +#define MX8MN_IOMUXC_SAI3_RXC_SAI2_RX_DATA1 0x1D0 0x438 0x5AC 0x3 0x2 +#define MX8MN_IOMUXC_SAI3_RXC_UART2_DCE_CTS_B 0x1D0 0x438 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI3_RXC_UART2_DTE_RTS_B 0x1D0 0x438 0x4F8 0x4 0x2 +#define MX8MN_IOMUXC_SAI3_RXC_GPIO4_IO29 0x1D0 0x438 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_RXC_PDM_CLK 0x1D0 0x438 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_SAI3_RX_DATA0 0x1D4 0x43C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_GPT1_COMPARE1 0x1D4 0x43C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_SAI5_RX_DATA0 0x1D4 0x43C 0x4D4 0x2 0x2 +#define MX8MN_IOMUXC_SAI3_RXD_SAI3_TX_DATA1 0x1D4 0x43C 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_UART2_DCE_RTS_B 0x1D4 0x43C 0x4F8 0x4 0x3 +#define MX8MN_IOMUXC_SAI3_RXD_UART2_DTE_CTS_B 0x1D4 0x43C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_GPIO4_IO30 0x1D4 0x43C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_RXD_PDM_BIT_STREAM1 0x1D4 0x43C 0x538 0x6 0x10 +#define MX8MN_IOMUXC_SAI3_TXFS_SAI3_TX_SYNC 0x1D8 0x440 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_TXFS_GPT1_CAPTURE2 0x1D8 0x440 0x5EC 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_TXFS_SAI5_RX_DATA1 0x1D8 0x440 0x4D8 0x2 0x1 +#define MX8MN_IOMUXC_SAI3_TXFS_SAI3_TX_DATA1 0x1D8 0x440 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI3_TXFS_UART2_DCE_RX 0x1D8 0x440 0x4FC 0x4 0x2 +#define MX8MN_IOMUXC_SAI3_TXFS_UART2_DTE_TX 0x1D8 0x440 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI3_TXFS_GPIO4_IO31 0x1D8 0x440 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_TXFS_PDM_BIT_STREAM3 0x1D8 0x440 0x540 0x6 0x9 +#define MX8MN_IOMUXC_SAI3_TXC_SAI3_TX_BCLK 0x1DC 0x444 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_TXC_GPT1_COMPARE2 0x1DC 0x444 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_TXC_SAI5_RX_DATA2 0x1DC 0x444 0x4DC 0x2 0x1 +#define MX8MN_IOMUXC_SAI3_TXC_SAI2_TX_DATA1 0x1DC 0x444 0x000 0x3 0x0 +#define MX8MN_IOMUXC_SAI3_TXC_UART2_DCE_TX 0x1DC 0x444 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI3_TXC_UART2_DTE_RX 0x1DC 0x444 0x4FC 0x4 0x3 +#define MX8MN_IOMUXC_SAI3_TXC_GPIO5_IO0 0x1DC 0x444 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_TXC_PDM_BIT_STREAM2 0x1DC 0x444 0x53C 0x6 0x9 +#define MX8MN_IOMUXC_SAI3_TXD_SAI3_TX_DATA0 0x1E0 0x448 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_TXD_GPT1_COMPARE3 0x1E0 0x448 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_TXD_SAI5_RX_DATA3 0x1E0 0x448 0x4E0 0x2 0x1 +#define MX8MN_IOMUXC_SAI3_TXD_SPDIF1_EXT_CLK 0x1E0 0x448 0x568 0x4 0x2 +#define MX8MN_IOMUXC_SAI3_TXD_GPIO5_IO1 0x1E0 0x448 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_TXD_CCMSRCGPCMIX_BOOT_MODE5 0x1E0 0x448 0x000 0x6 0x0 +#define MX8MN_IOMUXC_SAI3_MCLK_SAI3_MCLK 0x1E4 0x44C 0x5C0 0x0 0x0 +#define MX8MN_IOMUXC_SAI3_MCLK_PWM4_OUT 0x1E4 0x44C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SAI3_MCLK_SAI5_MCLK 0x1E4 0x44C 0x594 0x2 0x3 +#define MX8MN_IOMUXC_SAI3_MCLK_SPDIF1_OUT 0x1E4 0x44C 0x000 0x4 0x0 +#define MX8MN_IOMUXC_SAI3_MCLK_GPIO5_IO2 0x1E4 0x44C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SAI3_MCLK_SPDIF1_IN 0x1E4 0x44C 0x5CC 0x6 0x4 +#define MX8MN_IOMUXC_SPDIF_TX_SPDIF1_OUT 0x1E8 0x450 0x000 0x0 0x0 +#define MX8MN_IOMUXC_SPDIF_TX_PWM3_OUT 0x1E8 0x450 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SPDIF_TX_GPIO5_IO3 0x1E8 0x450 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SPDIF_RX_SPDIF1_IN 0x1EC 0x454 0x5CC 0x0 0x0 +#define MX8MN_IOMUXC_SPDIF_RX_PWM2_OUT 0x1EC 0x454 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SPDIF_RX_GPIO5_IO4 0x1EC 0x454 0x000 0x5 0x0 +#define MX8MN_IOMUXC_SPDIF_EXT_CLK_SPDIF1_EXT_CLK 0x1F0 0x458 0x568 0x0 0x0 +#define MX8MN_IOMUXC_SPDIF_EXT_CLK_PWM1_OUT 0x1F0 0x458 0x000 0x1 0x0 +#define MX8MN_IOMUXC_SPDIF_EXT_CLK_GPIO5_IO5 0x1F0 0x458 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI1_SCLK_ECSPI1_SCLK 0x1F4 0x45C 0x5D8 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI1_SCLK_UART3_DCE_RX 0x1F4 0x45C 0x504 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_SCLK_UART3_DTE_TX 0x1F4 0x45C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_SCLK_I2C1_SCL 0x1F4 0x45C 0x55C 0x2 0x2 +#define MX8MN_IOMUXC_ECSPI1_SCLK_SAI5_RX_SYNC 0x1F4 0x45C 0x4DC 0x3 0x2 +#define MX8MN_IOMUXC_ECSPI1_SCLK_GPIO5_IO6 0x1F4 0x45C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI1_MOSI_ECSPI1_MOSI 0x1F8 0x460 0x5A8 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI1_MOSI_UART3_DCE_TX 0x1F8 0x460 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_MOSI_UART3_DTE_RX 0x1F8 0x460 0x504 0x1 0x1 +#define MX8MN_IOMUXC_ECSPI1_MOSI_I2C1_SDA 0x1F8 0x460 0x56C 0x2 0x2 +#define MX8MN_IOMUXC_ECSPI1_MOSI_SAI5_RX_BCLK 0x1F8 0x460 0x4D0 0x3 0x3 +#define MX8MN_IOMUXC_ECSPI1_MOSI_GPIO5_IO7 0x1F8 0x460 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI1_MISO_ECSPI1_MISO 0x1FC 0x464 0x5C4 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI1_MISO_UART3_DCE_CTS_B 0x1FC 0x464 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_MISO_UART3_DTE_RTS_B 0x1FC 0x464 0x500 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_MISO_I2C2_SCL 0x1FC 0x464 0x5D0 0x2 0x2 +#define MX8MN_IOMUXC_ECSPI1_MISO_SAI5_RX_DATA0 0x1FC 0x464 0x4D4 0x3 0x3 +#define MX8MN_IOMUXC_ECSPI1_MISO_GPIO5_IO8 0x1FC 0x464 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI1_SS0_ECSPI1_SS0 0x200 0x468 0x564 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI1_SS0_UART3_DCE_RTS_B 0x200 0x468 0x500 0x1 0x1 +#define MX8MN_IOMUXC_ECSPI1_SS0_UART3_DTE_CTS_B 0x200 0x468 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI1_SS0_I2C2_SDA 0x200 0x468 0x560 0x2 0x2 +#define MX8MN_IOMUXC_ECSPI1_SS0_SAI5_RX_DATA1 0x200 0x468 0x4D8 0x3 0x2 +#define MX8MN_IOMUXC_ECSPI1_SS0_SAI5_TX_SYNC 0x200 0x468 0x4EC 0x4 0x3 +#define MX8MN_IOMUXC_ECSPI1_SS0_GPIO5_IO9 0x200 0x468 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI2_SCLK_ECSPI2_SCLK 0x204 0x46C 0x580 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI2_SCLK_UART4_DCE_RX 0x204 0x46C 0x50C 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_SCLK_UART4_DTE_TX 0x204 0x46C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_SCLK_I2C3_SCL 0x204 0x46C 0x588 0x2 0x4 +#define MX8MN_IOMUXC_ECSPI2_SCLK_SAI5_RX_DATA2 0x204 0x46C 0x000 0x3 0x0 +#define MX8MN_IOMUXC_ECSPI2_SCLK_SAI5_TX_BCLK 0x204 0x46C 0x4E8 0x4 0x3 +#define MX8MN_IOMUXC_ECSPI2_SCLK_GPIO5_IO10 0x204 0x46C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI2_MOSI_ECSPI2_MOSI 0x208 0x470 0x590 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI2_MOSI_UART4_DCE_TX 0x208 0x470 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_MOSI_UART4_DTE_RX 0x208 0x470 0x50C 0x1 0x1 +#define MX8MN_IOMUXC_ECSPI2_MOSI_I2C3_SDA 0x208 0x470 0x5BC 0x2 0x4 +#define MX8MN_IOMUXC_ECSPI2_MOSI_SAI5_RX_DATA3 0x208 0x470 0x4E0 0x3 0x2 +#define MX8MN_IOMUXC_ECSPI2_MOSI_SAI5_TX_DATA0 0x208 0x470 0x000 0x4 0x0 +#define MX8MN_IOMUXC_ECSPI2_MOSI_GPIO5_IO11 0x208 0x470 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI2_MISO_ECSPI2_MISO 0x20C 0x474 0x578 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI2_MISO_UART4_DCE_CTS_B 0x20C 0x474 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_MISO_UART4_DTE_RTS_B 0x20C 0x474 0x508 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_MISO_I2C4_SCL 0x20C 0x474 0x5D4 0x2 0x3 +#define MX8MN_IOMUXC_ECSPI2_MISO_SAI5_MCLK 0x20C 0x474 0x594 0x3 0x4 +#define MX8MN_IOMUXC_ECSPI2_MISO_GPIO5_IO12 0x20C 0x474 0x000 0x5 0x0 +#define MX8MN_IOMUXC_ECSPI2_SS0_ECSPI2_SS0 0x210 0x478 0x570 0x0 0x0 +#define MX8MN_IOMUXC_ECSPI2_SS0_UART4_DCE_RTS_B 0x210 0x478 0x508 0x1 0x1 +#define MX8MN_IOMUXC_ECSPI2_SS0_UART4_DTE_CTS_B 0x210 0x478 0x000 0x1 0x0 +#define MX8MN_IOMUXC_ECSPI2_SS0_I2C4_SDA 0x210 0x478 0x58C 0x2 0x5 +#define MX8MN_IOMUXC_ECSPI2_SS0_GPIO5_IO13 0x210 0x478 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C1_SCL_I2C1_SCL 0x214 0x47C 0x55C 0x0 0x0 +#define MX8MN_IOMUXC_I2C1_SCL_ENET1_MDC 0x214 0x47C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C1_SCL_ECSPI1_SCLK 0x214 0x47C 0x5D8 0x3 0x1 +#define MX8MN_IOMUXC_I2C1_SCL_GPIO5_IO14 0x214 0x47C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C1_SDA_I2C1_SDA 0x218 0x480 0x56C 0x0 0x0 +#define MX8MN_IOMUXC_I2C1_SDA_ENET1_MDIO 0x218 0x480 0x4C0 0x1 0x2 +#define MX8MN_IOMUXC_I2C1_SDA_ECSPI1_MOSI 0x218 0x480 0x5A8 0x3 0x1 +#define MX8MN_IOMUXC_I2C1_SDA_GPIO5_IO15 0x218 0x480 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C2_SCL_I2C2_SCL 0x21C 0x484 0x5D0 0x0 0x0 +#define MX8MN_IOMUXC_I2C2_SCL_ENET1_1588_EVENT1_IN 0x21C 0x484 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C2_SCL_USDHC3_CD_B 0x21C 0x484 0x598 0x2 0x1 +#define MX8MN_IOMUXC_I2C2_SCL_ECSPI1_MISO 0x21C 0x484 0x5C4 0x3 0x1 +#define MX8MN_IOMUXC_I2C2_SCL_GPIO5_IO16 0x21C 0x484 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C2_SDA_I2C2_SDA 0x220 0x488 0x560 0x0 0x0 +#define MX8MN_IOMUXC_I2C2_SDA_ENET1_1588_EVENT1_OUT 0x220 0x488 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C2_SDA_USDHC3_WP 0x220 0x488 0x5B8 0x2 0x1 +#define MX8MN_IOMUXC_I2C2_SDA_ECSPI1_SS0 0x220 0x488 0x564 0x3 0x1 +#define MX8MN_IOMUXC_I2C2_SDA_GPIO5_IO17 0x220 0x488 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C3_SCL_I2C3_SCL 0x224 0x48C 0x588 0x0 0x0 +#define MX8MN_IOMUXC_I2C3_SCL_PWM4_OUT 0x224 0x48C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C3_SCL_GPT2_CLK 0x224 0x48C 0x000 0x2 0x0 +#define MX8MN_IOMUXC_I2C3_SCL_ECSPI2_SCLK 0x224 0x48C 0x580 0x3 0x2 +#define MX8MN_IOMUXC_I2C3_SCL_GPIO5_IO18 0x224 0x48C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C3_SDA_I2C3_SDA 0x228 0x490 0x5BC 0x0 0x0 +#define MX8MN_IOMUXC_I2C3_SDA_PWM3_OUT 0x228 0x490 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C3_SDA_GPT3_CLK 0x228 0x490 0x000 0x2 0x0 +#define MX8MN_IOMUXC_I2C3_SDA_ECSPI2_MOSI 0x228 0x490 0x590 0x3 0x2 +#define MX8MN_IOMUXC_I2C3_SDA_GPIO5_IO19 0x228 0x490 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C4_SCL_I2C4_SCL 0x22C 0x494 0x5D4 0x0 0x0 +#define MX8MN_IOMUXC_I2C4_SCL_PWM2_OUT 0x22C 0x494 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C4_SCL_ECSPI2_MISO 0x22C 0x494 0x578 0x3 0x2 +#define MX8MN_IOMUXC_I2C4_SCL_GPIO5_IO20 0x22C 0x494 0x000 0x5 0x0 +#define MX8MN_IOMUXC_I2C4_SDA_I2C4_SDA 0x230 0x498 0x58C 0x0 0x0 +#define MX8MN_IOMUXC_I2C4_SDA_PWM1_OUT 0x230 0x498 0x000 0x1 0x0 +#define MX8MN_IOMUXC_I2C4_SDA_ECSPI2_SS0 0x230 0x498 0x570 0x3 0x1 +#define MX8MN_IOMUXC_I2C4_SDA_GPIO5_IO21 0x230 0x498 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART1_RXD_UART1_DCE_RX 0x234 0x49C 0x4F4 0x0 0x0 +#define MX8MN_IOMUXC_UART1_RXD_UART1_DTE_TX 0x234 0x49C 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART1_RXD_ECSPI3_SCLK 0x234 0x49C 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART1_RXD_GPIO5_IO22 0x234 0x49C 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART1_TXD_UART1_DCE_TX 0x238 0x4A0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART1_TXD_UART1_DTE_RX 0x238 0x4A0 0x4F4 0x0 0x1 +#define MX8MN_IOMUXC_UART1_TXD_ECSPI3_MOSI 0x238 0x4A0 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART1_TXD_GPIO5_IO23 0x238 0x4A0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART2_RXD_UART2_DCE_RX 0x23C 0x4A4 0x4FC 0x0 0x0 +#define MX8MN_IOMUXC_UART2_RXD_UART2_DTE_TX 0x23C 0x4A4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART2_RXD_ECSPI3_MISO 0x23C 0x4A4 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART2_RXD_GPT1_COMPARE3 0x23C 0x4A4 0x000 0x3 0x0 +#define MX8MN_IOMUXC_UART2_RXD_GPIO5_IO24 0x23C 0x4A4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART2_TXD_UART2_DCE_TX 0x240 0x4A8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART2_TXD_UART2_DTE_RX 0x240 0x4A8 0x4FC 0x0 0x1 +#define MX8MN_IOMUXC_UART2_TXD_ECSPI3_SS0 0x240 0x4A8 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART2_TXD_GPT1_COMPARE2 0x240 0x4A8 0x000 0x3 0x0 +#define MX8MN_IOMUXC_UART2_TXD_GPIO5_IO25 0x240 0x4A8 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART3_RXD_UART3_DCE_RX 0x244 0x4AC 0x504 0x0 0x2 +#define MX8MN_IOMUXC_UART3_RXD_UART3_DTE_TX 0x244 0x4AC 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART3_RXD_UART1_DCE_CTS_B 0x244 0x4AC 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART3_RXD_UART1_DTE_RTS_B 0x244 0x4AC 0x4F0 0x1 0x0 +#define MX8MN_IOMUXC_UART3_RXD_USDHC3_RESET_B 0x244 0x4AC 0x000 0x2 0x0 +#define MX8MN_IOMUXC_UART3_RXD_GPT1_CAPTURE2 0x244 0x4AC 0x5EC 0x3 0x1 +#define MX8MN_IOMUXC_UART3_RXD_GPIO5_IO26 0x244 0x4AC 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART3_TXD_UART3_DCE_TX 0x248 0x4B0 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART3_TXD_UART3_DTE_RX 0x248 0x4B0 0x504 0x0 0x3 +#define MX8MN_IOMUXC_UART3_TXD_UART1_DCE_RTS_B 0x248 0x4B0 0x4F0 0x1 0x1 +#define MX8MN_IOMUXC_UART3_TXD_UART1_DTE_CTS_B 0x248 0x4B0 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART3_TXD_USDHC3_VSELECT 0x248 0x4B0 0x000 0x2 0x0 +#define MX8MN_IOMUXC_UART3_TXD_GPT1_CLK 0x248 0x4B0 0x5E8 0x3 0x1 +#define MX8MN_IOMUXC_UART3_TXD_GPIO5_IO27 0x248 0x4B0 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART4_RXD_UART4_DCE_RX 0x24C 0x4B4 0x50C 0x0 0x2 +#define MX8MN_IOMUXC_UART4_RXD_UART4_DTE_TX 0x24C 0x4B4 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART4_RXD_UART2_DCE_CTS_B 0x24C 0x4B4 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART4_RXD_UART2_DTE_RTS_B 0x24C 0x4B4 0x4F8 0x1 0x0 +#define MX8MN_IOMUXC_UART4_RXD_GPT1_COMPARE1 0x24C 0x4B4 0x000 0x3 0x0 +#define MX8MN_IOMUXC_UART4_RXD_GPIO5_IO28 0x24C 0x4B4 0x000 0x5 0x0 +#define MX8MN_IOMUXC_UART4_TXD_UART4_DCE_TX 0x250 0x4B8 0x000 0x0 0x0 +#define MX8MN_IOMUXC_UART4_TXD_UART4_DTE_RX 0x250 0x4B8 0x50C 0x0 0x3 +#define MX8MN_IOMUXC_UART4_TXD_UART2_DCE_RTS_B 0x250 0x4B8 0x4F8 0x1 0x1 +#define MX8MN_IOMUXC_UART4_TXD_UART2_DTE_CTS_B 0x250 0x4B8 0x000 0x1 0x0 +#define MX8MN_IOMUXC_UART4_TXD_GPT1_CAPTURE1 0x250 0x4B8 0x5F0 0x3 0x1 +#define MX8MN_IOMUXC_UART4_TXD_GPIO5_IO29 0x250 0x4B8 0x000 0x5 0x0 + +#endif /* __DTS_IMX8MN_PINFUNC_H */ diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 4b1f5ae710eb..d1e13d340e26 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -929,7 +929,8 @@ sgmiisys: sgmiisys@1b128000 { compatible = "mediatek,mt7622-sgmiisys", "syscon"; - reg = <0 0x1b128000 0 0x1000>; + reg = <0 0x1b128000 0 0x3000>; #clock-cells = <1>; + mediatek,physpeed = "2500"; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index f09f3e03f708..108667ce4f31 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -27,6 +27,23 @@ status = "okay"; }; +&pm8005_lsid1 { + pm8005-regulators { + compatible = "qcom,pm8005-regulators"; + + vdd_s1-supply = <&vph_pwr>; + + pm8005_s1: s1 { /* VDD_GFX supply */ + regulator-min-microvolt = <524000>; + regulator-max-microvolt = <1100000>; + regulator-enable-ramp-delay = <500>; + + /* hack until we rig up the gpu consumer */ + regulator-always-on; + }; + }; +}; + &qusb2phy { status = "okay"; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 4d583514258c..dd827e64e5fe 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -68,6 +68,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_XEN=y CONFIG_COMPAT=y +CONFIG_RANDOMIZE_BASE=y CONFIG_HIBERNATION=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y @@ -613,6 +614,7 @@ CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_IMX_SC=m CONFIG_RTC_DRV_XGENE=y CONFIG_DMADEVICES=y +CONFIG_FSL_EDMA=y CONFIG_DMA_BCM2835=m CONFIG_K3_DMA=y CONFIG_MV_XOR=y diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 3ebfaec97e27..00bd2885feaa 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -15,6 +15,8 @@ .arch armv8-a+crypto xtsmask .req v16 + cbciv .req v16 + vctr .req v16 .macro xts_reload_mask, tmp .endm @@ -49,7 +51,7 @@ load_round_keys \rounds, \temp .endm - .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 + .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4 aes\de \i0\().16b, \k\().16b aes\mc \i0\().16b, \i0\().16b .ifnb \i1 @@ -60,27 +62,34 @@ aes\mc \i2\().16b, \i2\().16b aes\de \i3\().16b, \k\().16b aes\mc \i3\().16b, \i3\().16b + .ifnb \i4 + aes\de \i4\().16b, \k\().16b + aes\mc \i4\().16b, \i4\().16b + .endif .endif .endif .endm - /* up to 4 interleaved encryption rounds with the same round key */ - .macro round_Nx, enc, k, i0, i1, i2, i3 + /* up to 5 interleaved encryption rounds with the same round key */ + .macro round_Nx, enc, k, i0, i1, i2, i3, i4 .ifc \enc, e - do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3 + do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4 .else - do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3 + do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4 .endif .endm - /* up to 4 interleaved final rounds */ - .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3 + /* up to 5 interleaved final rounds */ + .macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4 aes\de \i0\().16b, \k\().16b .ifnb \i1 aes\de \i1\().16b, \k\().16b .ifnb \i3 aes\de \i2\().16b, \k\().16b aes\de \i3\().16b, \k\().16b + .ifnb \i4 + aes\de \i4\().16b, \k\().16b + .endif .endif .endif eor \i0\().16b, \i0\().16b, \k2\().16b @@ -89,47 +98,52 @@ .ifnb \i3 eor \i2\().16b, \i2\().16b, \k2\().16b eor \i3\().16b, \i3\().16b, \k2\().16b + .ifnb \i4 + eor \i4\().16b, \i4\().16b, \k2\().16b + .endif .endif .endif .endm - /* up to 4 interleaved blocks */ - .macro do_block_Nx, enc, rounds, i0, i1, i2, i3 + /* up to 5 interleaved blocks */ + .macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4 cmp \rounds, #12 blo 2222f /* 128 bits */ beq 1111f /* 192 bits */ - round_Nx \enc, v17, \i0, \i1, \i2, \i3 - round_Nx \enc, v18, \i0, \i1, \i2, \i3 -1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3 - round_Nx \enc, v20, \i0, \i1, \i2, \i3 + round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4 + round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4 +1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4 + round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4 2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29 - round_Nx \enc, \key, \i0, \i1, \i2, \i3 + round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4 .endr - fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3 + fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4 .endm .macro encrypt_block, in, rounds, t0, t1, t2 do_block_Nx e, \rounds, \in .endm - .macro encrypt_block2x, i0, i1, rounds, t0, t1, t2 - do_block_Nx e, \rounds, \i0, \i1 - .endm - .macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 do_block_Nx e, \rounds, \i0, \i1, \i2, \i3 .endm - .macro decrypt_block, in, rounds, t0, t1, t2 - do_block_Nx d, \rounds, \in + .macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 + do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4 .endm - .macro decrypt_block2x, i0, i1, rounds, t0, t1, t2 - do_block_Nx d, \rounds, \i0, \i1 + .macro decrypt_block, in, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \in .endm .macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2 do_block_Nx d, \rounds, \i0, \i1, \i2, \i3 .endm + .macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2 + do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4 + .endm + +#define MAX_STRIDE 5 + #include "aes-modes.S" diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 2883def14be5..324039b72094 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -10,6 +10,18 @@ .text .align 4 +#ifndef MAX_STRIDE +#define MAX_STRIDE 4 +#endif + +#if MAX_STRIDE == 4 +#define ST4(x...) x +#define ST5(x...) +#else +#define ST4(x...) +#define ST5(x...) x +#endif + aes_encrypt_block4x: encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 ret @@ -20,6 +32,18 @@ aes_decrypt_block4x: ret ENDPROC(aes_decrypt_block4x) +#if MAX_STRIDE == 5 +aes_encrypt_block5x: + encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 + ret +ENDPROC(aes_encrypt_block5x) + +aes_decrypt_block5x: + decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 + ret +ENDPROC(aes_decrypt_block5x) +#endif + /* * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks) @@ -34,14 +58,17 @@ AES_ENTRY(aes_ecb_encrypt) enc_prepare w3, x2, x5 .LecbencloopNx: - subs w4, w4, #4 + subs w4, w4, #MAX_STRIDE bmi .Lecbenc1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ - bl aes_encrypt_block4x +ST4( bl aes_encrypt_block4x ) +ST5( ld1 {v4.16b}, [x1], #16 ) +ST5( bl aes_encrypt_block5x ) st1 {v0.16b-v3.16b}, [x0], #64 +ST5( st1 {v4.16b}, [x0], #16 ) b .LecbencloopNx .Lecbenc1x: - adds w4, w4, #4 + adds w4, w4, #MAX_STRIDE beq .Lecbencout .Lecbencloop: ld1 {v0.16b}, [x1], #16 /* get next pt block */ @@ -62,14 +89,17 @@ AES_ENTRY(aes_ecb_decrypt) dec_prepare w3, x2, x5 .LecbdecloopNx: - subs w4, w4, #4 + subs w4, w4, #MAX_STRIDE bmi .Lecbdec1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ - bl aes_decrypt_block4x +ST4( bl aes_decrypt_block4x ) +ST5( ld1 {v4.16b}, [x1], #16 ) +ST5( bl aes_decrypt_block5x ) st1 {v0.16b-v3.16b}, [x0], #64 +ST5( st1 {v4.16b}, [x0], #16 ) b .LecbdecloopNx .Lecbdec1x: - adds w4, w4, #4 + adds w4, w4, #MAX_STRIDE beq .Lecbdecout .Lecbdecloop: ld1 {v0.16b}, [x1], #16 /* get next ct block */ @@ -129,39 +159,56 @@ AES_ENTRY(aes_cbc_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp - ld1 {v7.16b}, [x5] /* get iv */ + ld1 {cbciv.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 .LcbcdecloopNx: - subs w4, w4, #4 + subs w4, w4, #MAX_STRIDE bmi .Lcbcdec1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ +#if MAX_STRIDE == 5 + ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ + mov v5.16b, v0.16b + mov v6.16b, v1.16b + mov v7.16b, v2.16b + bl aes_decrypt_block5x + sub x1, x1, #32 + eor v0.16b, v0.16b, cbciv.16b + eor v1.16b, v1.16b, v5.16b + ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ + ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + eor v4.16b, v4.16b, v5.16b +#else mov v4.16b, v0.16b mov v5.16b, v1.16b mov v6.16b, v2.16b bl aes_decrypt_block4x sub x1, x1, #16 - eor v0.16b, v0.16b, v7.16b + eor v0.16b, v0.16b, cbciv.16b eor v1.16b, v1.16b, v4.16b - ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ eor v2.16b, v2.16b, v5.16b eor v3.16b, v3.16b, v6.16b +#endif st1 {v0.16b-v3.16b}, [x0], #64 +ST5( st1 {v4.16b}, [x0], #16 ) b .LcbcdecloopNx .Lcbcdec1x: - adds w4, w4, #4 + adds w4, w4, #MAX_STRIDE beq .Lcbcdecout .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ decrypt_block v0, w3, x2, x6, w7 - eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ - mov v7.16b, v1.16b /* ct is next iv */ + eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ + mov cbciv.16b, v1.16b /* ct is next iv */ st1 {v0.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcdecloop .Lcbcdecout: - st1 {v7.16b}, [x5] /* return iv */ + st1 {cbciv.16b}, [x5] /* return iv */ ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_cbc_decrypt) @@ -255,51 +302,60 @@ AES_ENTRY(aes_ctr_encrypt) mov x29, sp enc_prepare w3, x2, x6 - ld1 {v4.16b}, [x5] + ld1 {vctr.16b}, [x5] - umov x6, v4.d[1] /* keep swabbed ctr in reg */ + umov x6, vctr.d[1] /* keep swabbed ctr in reg */ rev x6, x6 cmn w6, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: - subs w4, w4, #4 + subs w4, w4, #MAX_STRIDE bmi .Lctr1x add w7, w6, #1 - mov v0.16b, v4.16b + mov v0.16b, vctr.16b add w8, w6, #2 - mov v1.16b, v4.16b + mov v1.16b, vctr.16b + add w9, w6, #3 + mov v2.16b, vctr.16b add w9, w6, #3 - mov v2.16b, v4.16b rev w7, w7 - mov v3.16b, v4.16b + mov v3.16b, vctr.16b rev w8, w8 +ST5( mov v4.16b, vctr.16b ) mov v1.s[3], w7 rev w9, w9 +ST5( add w10, w6, #4 ) mov v2.s[3], w8 +ST5( rev w10, w10 ) mov v3.s[3], w9 +ST5( mov v4.s[3], w10 ) ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ - bl aes_encrypt_block4x +ST4( bl aes_encrypt_block4x ) +ST5( bl aes_encrypt_block5x ) eor v0.16b, v5.16b, v0.16b - ld1 {v5.16b}, [x1], #16 /* get 1 input block */ +ST4( ld1 {v5.16b}, [x1], #16 ) eor v1.16b, v6.16b, v1.16b +ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b +ST5( eor v4.16b, v6.16b, v4.16b ) st1 {v0.16b-v3.16b}, [x0], #64 - add x6, x6, #4 +ST5( st1 {v4.16b}, [x0], #16 ) + add x6, x6, #MAX_STRIDE rev x7, x6 - ins v4.d[1], x7 + ins vctr.d[1], x7 cbz w4, .Lctrout b .LctrloopNx .Lctr1x: - adds w4, w4, #4 + adds w4, w4, #MAX_STRIDE beq .Lctrout .Lctrloop: - mov v0.16b, v4.16b + mov v0.16b, vctr.16b encrypt_block v0, w3, x2, x8, w7 adds x6, x6, #1 /* increment BE ctr */ rev x7, x6 - ins v4.d[1], x7 + ins vctr.d[1], x7 bcs .Lctrcarry /* overflow? */ .Lctrcarrydone: @@ -311,7 +367,7 @@ AES_ENTRY(aes_ctr_encrypt) bne .Lctrloop .Lctrout: - st1 {v4.16b}, [x5] /* return next CTR value */ + st1 {vctr.16b}, [x5] /* return next CTR value */ ldp x29, x30, [sp], #16 ret @@ -320,11 +376,11 @@ AES_ENTRY(aes_ctr_encrypt) b .Lctrout .Lctrcarry: - umov x7, v4.d[0] /* load upper word of ctr */ + umov x7, vctr.d[0] /* load upper word of ctr */ rev x7, x7 /* ... to handle the carry */ add x7, x7, #1 rev x7, x7 - ins v4.d[0], x7 + ins vctr.d[0], x7 b .Lctrcarrydone AES_ENDPROC(aes_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index d261331747f2..2bebccc73869 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -12,6 +12,8 @@ #define AES_ENDPROC(func) ENDPROC(neon_ ## func) xtsmask .req v7 + cbciv .req v7 + vctr .req v4 .macro xts_reload_mask, tmp xts_load_mask \tmp @@ -114,26 +116,9 @@ /* * Interleaved versions: functionally equivalent to the - * ones above, but applied to 2 or 4 AES states in parallel. + * ones above, but applied to AES states in parallel. */ - .macro sub_bytes_2x, in0, in1 - sub v8.16b, \in0\().16b, v15.16b - tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b - sub v9.16b, \in1\().16b, v15.16b - tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b - sub v10.16b, v8.16b, v15.16b - tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b - sub v11.16b, v9.16b, v15.16b - tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b - sub v8.16b, v10.16b, v15.16b - tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b - sub v9.16b, v11.16b, v15.16b - tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b - tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b - tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b - .endm - .macro sub_bytes_4x, in0, in1, in2, in3 sub v8.16b, \in0\().16b, v15.16b tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b @@ -212,25 +197,6 @@ eor \in1\().16b, \in1\().16b, v11.16b .endm - .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i - ld1 {v15.4s}, [\rk] - add \rkp, \rk, #16 - mov \i, \rounds -1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - movi v15.16b, #0x40 - tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ - tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - sub_bytes_2x \in0, \in1 - subs \i, \i, #1 - ld1 {v15.4s}, [\rkp], #16 - beq 2222f - mix_columns_2x \in0, \in1, \enc - b 1111b -2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ - eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */ - .endm - .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 @@ -257,14 +223,6 @@ eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */ .endm - .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i - do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i - .endm - - .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i - do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i - .endm - .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i .endm diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index 82029cda2e77..1495d2b18518 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -60,7 +60,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, } static int chacha_neon_stream_xor(struct skcipher_request *req, - struct chacha_ctx *ctx, u8 *iv) + const struct chacha_ctx *ctx, const u8 *iv) { struct skcipher_walk walk; u32 state[16]; diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index ecb0f67e5998..bdc1b6d7aff7 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -52,7 +52,7 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); - bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); + bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE) && len; if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 955c3c2d3f5a..604a01a4ede6 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -57,7 +57,7 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct sha256_ce_state *sctx = shash_desc_ctx(desc); - bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); + bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE) && len; if (!crypto_simd_usable()) { if (len) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index ada0bc480a1b..b263e239cb59 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -38,6 +38,9 @@ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \ (unsigned long)(entry) + (entry)->header.length > (end)) +#define ACPI_MADT_GICC_SPE (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \ + spe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 2247908e55d6..79155a8cfe7c 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -152,7 +152,9 @@ static inline bool gic_prio_masking_enabled(void) static inline void gic_pmr_mask_irqs(void) { - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET)); + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); gic_write_pmr(GIC_PRIO_IRQOFF); } diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 6756178c27db..7ae54d7d333a 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -9,6 +9,7 @@ #define __ASM_ARCH_TIMER_H #include <asm/barrier.h> +#include <asm/hwcap.h> #include <asm/sysreg.h> #include <linux/bug.h> @@ -229,4 +230,16 @@ static inline int arch_timer_arch_init(void) return 0; } +static inline void arch_timer_set_evtstrm_feature(void) +{ + cpu_set_named_feature(EVTSTRM); +#ifdef CONFIG_COMPAT + compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; +#endif +} + +static inline bool arch_timer_have_evtstrm_feature(void) +{ + return cpu_have_named_feature(EVTSTRM); +} #endif diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 570d195a184d..e3a15c751b13 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -96,7 +96,11 @@ * RAS Error Synchronization barrier */ .macro esb +#ifdef CONFIG_ARM64_RAS_EXTN hint #16 +#else + nop +#endif .endm /* diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 23c378606aed..c8c850bc3dfb 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -122,9 +122,9 @@ ATOMIC_OPS(xor, eor) #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ -__LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ +__LL_SC_PREFIX(arch_atomic64_##op(s64 i, atomic64_t *v)) \ { \ - long result; \ + s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ @@ -139,10 +139,10 @@ __LL_SC_PREFIX(arch_atomic64_##op(long i, atomic64_t *v)) \ __LL_SC_EXPORT(arch_atomic64_##op); #define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ -__LL_SC_INLINE long \ -__LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ +__LL_SC_INLINE s64 \ +__LL_SC_PREFIX(arch_atomic64_##op##_return##name(s64 i, atomic64_t *v))\ { \ - long result; \ + s64 result; \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return" #name "\n" \ @@ -161,10 +161,10 @@ __LL_SC_PREFIX(arch_atomic64_##op##_return##name(long i, atomic64_t *v))\ __LL_SC_EXPORT(arch_atomic64_##op##_return##name); #define ATOMIC64_FETCH_OP(name, mb, acq, rel, cl, op, asm_op) \ -__LL_SC_INLINE long \ -__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(long i, atomic64_t *v)) \ +__LL_SC_INLINE s64 \ +__LL_SC_PREFIX(arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v)) \ { \ - long result, val; \ + s64 result, val; \ unsigned long tmp; \ \ asm volatile("// atomic64_fetch_" #op #name "\n" \ @@ -214,10 +214,10 @@ ATOMIC64_OPS(xor, eor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -__LL_SC_INLINE long +__LL_SC_INLINE s64 __LL_SC_PREFIX(arch_atomic64_dec_if_positive(atomic64_t *v)) { - long result; + s64 result; unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 45e030d54332..69acb1c19a15 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -213,9 +213,9 @@ ATOMIC_FETCH_OP_SUB( , al, "memory") #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(arch_atomic64_##op) #define ATOMIC64_OP(op, asm_op) \ -static inline void arch_atomic64_##op(long i, atomic64_t *v) \ +static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(op), \ @@ -233,9 +233,9 @@ ATOMIC64_OP(add, stadd) #undef ATOMIC64_OP #define ATOMIC64_FETCH_OP(name, mb, op, asm_op, cl...) \ -static inline long arch_atomic64_fetch_##op##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_fetch_##op##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -265,9 +265,9 @@ ATOMIC64_FETCH_OPS(add, ldadd) #undef ATOMIC64_FETCH_OPS #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ -static inline long arch_atomic64_add_return##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_add_return##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -291,9 +291,9 @@ ATOMIC64_OP_ADD_RETURN( , al, "memory") #undef ATOMIC64_OP_ADD_RETURN -static inline void arch_atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { - register long x0 asm ("x0") = i; + register s64 x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; asm volatile(ARM64_LSE_ATOMIC_INSN( @@ -309,9 +309,9 @@ static inline void arch_atomic64_and(long i, atomic64_t *v) } #define ATOMIC64_FETCH_OP_AND(name, mb, cl...) \ -static inline long arch_atomic64_fetch_and##name(long i, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_and##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -335,9 +335,9 @@ ATOMIC64_FETCH_OP_AND( , al, "memory") #undef ATOMIC64_FETCH_OP_AND -static inline void arch_atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(s64 i, atomic64_t *v) { - register long x0 asm ("x0") = i; + register s64 x0 asm ("x0") = i; register atomic64_t *x1 asm ("x1") = v; asm volatile(ARM64_LSE_ATOMIC_INSN( @@ -353,9 +353,9 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) } #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ -static inline long arch_atomic64_sub_return##name(long i, atomic64_t *v)\ +static inline s64 arch_atomic64_sub_return##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -381,9 +381,9 @@ ATOMIC64_OP_SUB_RETURN( , al, "memory") #undef ATOMIC64_OP_SUB_RETURN #define ATOMIC64_FETCH_OP_SUB(name, mb, cl...) \ -static inline long arch_atomic64_fetch_sub##name(long i, atomic64_t *v) \ +static inline s64 arch_atomic64_fetch_sub##name(s64 i, atomic64_t *v) \ { \ - register long x0 asm ("x0") = i; \ + register s64 x0 asm ("x0") = i; \ register atomic64_t *x1 asm ("x1") = v; \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ @@ -407,7 +407,7 @@ ATOMIC64_FETCH_OP_SUB( , al, "memory") #undef ATOMIC64_FETCH_OP_SUB -static inline long arch_atomic64_dec_if_positive(atomic64_t *v) +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { register long x0 asm ("x0") = (long)v; diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index a05db636981a..64eeaa41e7ca 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -80,12 +80,15 @@ static inline u32 cache_type_cwg(void) #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -static inline int cache_line_size(void) +static inline int cache_line_size_of_cpu(void) { u32 cwg = cache_type_cwg(); + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } +int cache_line_size(void); + /* * Read the effective value of CTR_EL0. * diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 1fe4467442aa..665c78e0665a 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -176,4 +176,7 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end) int set_memory_valid(unsigned long addr, int numpages, int enable); +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + #endif diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 373799b7982f..407e2bf23676 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -614,6 +614,18 @@ static inline bool system_uses_irq_prio_masking(void) cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING); } +static inline bool system_has_prio_mask_debugging(void) +{ + return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) && + system_uses_irq_prio_masking(); +} + +#define ARM64_BP_HARDEN_UNKNOWN -1 +#define ARM64_BP_HARDEN_WA_NEEDED 0 +#define ARM64_BP_HARDEN_NOT_REQUIRED 1 + +int get_spectre_v2_workaround_state(void); + #define ARM64_SSBD_UNKNOWN -1 #define ARM64_SSBD_FORCE_DISABLE 0 #define ARM64_SSBD_KERNEL 1 diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 6dd8a8723525..987926ed535e 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -7,6 +7,7 @@ #include <linux/irqflags.h> +#include <asm/arch_gicv3.h> #include <asm/cpufeature.h> #define DAIF_PROCCTX 0 @@ -16,11 +17,20 @@ /* mask/save/unmask/restore all exceptions, including interrupts. */ static inline void local_daif_mask(void) { + WARN_ON(system_has_prio_mask_debugging() && + (read_sysreg_s(SYS_ICC_PMR_EL1) == (GIC_PRIO_IRQOFF | + GIC_PRIO_PSR_I_SET))); + asm volatile( "msr daifset, #0xf // local_daif_mask\n" : : : "memory"); + + /* Don't really care for a dsb here, we don't intend to enable IRQs */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + trace_hardirqs_off(); } @@ -32,7 +42,7 @@ static inline unsigned long local_daif_save(void) if (system_uses_irq_prio_masking()) { /* If IRQs are masked with PMR, reflect it in the flags */ - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) flags |= PSR_I_BIT; } @@ -45,39 +55,50 @@ static inline void local_daif_restore(unsigned long flags) { bool irq_disabled = flags & PSR_I_BIT; + WARN_ON(system_has_prio_mask_debugging() && + !(read_sysreg(daif) & PSR_I_BIT)); + if (!irq_disabled) { trace_hardirqs_on(); - if (system_uses_irq_prio_masking()) - arch_local_irq_enable(); - } else if (!(flags & PSR_A_BIT)) { - /* - * If interrupts are disabled but we can take - * asynchronous errors, we can take NMIs - */ if (system_uses_irq_prio_masking()) { - flags &= ~PSR_I_BIT; + gic_write_pmr(GIC_PRIO_IRQON); + dsb(sy); + } + } else if (system_uses_irq_prio_masking()) { + u64 pmr; + + if (!(flags & PSR_A_BIT)) { /* - * There has been concern that the write to daif - * might be reordered before this write to PMR. - * From the ARM ARM DDI 0487D.a, section D1.7.1 - * "Accessing PSTATE fields": - * Writes to the PSTATE fields have side-effects on - * various aspects of the PE operation. All of these - * side-effects are guaranteed: - * - Not to be visible to earlier instructions in - * the execution stream. - * - To be visible to later instructions in the - * execution stream - * - * Also, writes to PMR are self-synchronizing, so no - * interrupts with a lower priority than PMR is signaled - * to the PE after the write. - * - * So we don't need additional synchronization here. + * If interrupts are disabled but we can take + * asynchronous errors, we can take NMIs */ - arch_local_irq_disable(); + flags &= ~PSR_I_BIT; + pmr = GIC_PRIO_IRQOFF; + } else { + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; } + + /* + * There has been concern that the write to daif + * might be reordered before this write to PMR. + * From the ARM ARM DDI 0487D.a, section D1.7.1 + * "Accessing PSTATE fields": + * Writes to the PSTATE fields have side-effects on + * various aspects of the PE operation. All of these + * side-effects are guaranteed: + * - Not to be visible to earlier instructions in + * the execution stream. + * - To be visible to later instructions in the + * execution stream + * + * Also, writes to PMR are self-synchronizing, so no + * interrupts with a lower priority than PMR is signaled + * to the PE after the write. + * + * So we don't need additional synchronization here. + */ + gic_write_pmr(pmr); } write_sysreg(flags, daif); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index c9e9a6978e73..8e79ce9c3f5c 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -83,7 +83,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the - * usual requirements in Documentation/arm64/booting.txt, which still + * usual requirements in Documentation/arm64/booting.rst, which still * apply to other bootloaders, and are required for some kernel * configurations. */ diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 325d9515c0f8..3c7037c6ba9b 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -202,7 +202,21 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ({ \ set_thread_flag(TIF_32BIT); \ }) +#ifdef CONFIG_GENERIC_COMPAT_VDSO +#define COMPAT_ARCH_DLINFO \ +do { \ + /* \ + * Note that we use Elf64_Off instead of elf_addr_t because \ + * elf_addr_t in compat is defined as Elf32_Addr and casting \ + * current->mm->context.vdso to it triggers a cast warning of \ + * cast from pointer to integer of different size. \ + */ \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (Elf64_Off)current->mm->context.vdso); \ +} while (0) +#else #define COMPAT_ARCH_DLINFO +#endif extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); #define compat_arch_setup_additional_pages \ diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 897029c8e9b5..b6a2c352f4c3 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -37,8 +37,6 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); -extern void fpsimd_save(void); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -52,8 +50,7 @@ extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state, void *sve_state, unsigned int sve_vl); extern void fpsimd_flush_task_state(struct task_struct *target); -extern void fpsimd_flush_cpu_state(void); -extern void sve_flush_cpu_state(void); +extern void fpsimd_save_and_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index e5d9420cd258..3d2f2472a36c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -84,6 +84,8 @@ #define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) #define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) #define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) +#define KERNEL_HWCAP_FLAGM2 __khwcap2_feature(FLAGM2) +#define KERNEL_HWCAP_FRINT __khwcap2_feature(FRINT) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h index e2c27a2278e9..c2b13213c720 100644 --- a/arch/arm64/include/asm/image.h +++ b/arch/arm64/include/asm/image.h @@ -27,7 +27,7 @@ /* * struct arm64_image_header - arm64 kernel image header - * See Documentation/arm64/booting.txt for details + * See Documentation/arm64/booting.rst for details * * @code0: Executable code, or * @mz_header alternatively used for part of MZ header diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 66853fde60f9..7872f260c9ee 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -29,6 +29,12 @@ */ static inline void arch_local_irq_enable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", @@ -42,6 +48,12 @@ static inline void arch_local_irq_enable(void) static inline void arch_local_irq_disable(void) { + if (system_has_prio_mask_debugging()) { + u32 pmr = read_sysreg_s(SYS_ICC_PMR_EL1); + + WARN_ON_ONCE(pmr != GIC_PRIO_IRQON && pmr != GIC_PRIO_IRQOFF); + } + asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", __msr_s(SYS_ICC_PMR_EL1, "%0"), @@ -56,43 +68,46 @@ static inline void arch_local_irq_disable(void) */ static inline unsigned long arch_local_save_flags(void) { - unsigned long daif_bits; unsigned long flags; - daif_bits = read_sysreg(daif); - - /* - * The asm is logically equivalent to: - * - * if (system_uses_irq_prio_masking()) - * flags = (daif_bits & PSR_I_BIT) ? - * GIC_PRIO_IRQOFF : - * read_sysreg_s(SYS_ICC_PMR_EL1); - * else - * flags = daif_bits; - */ asm volatile(ALTERNATIVE( - "mov %0, %1\n" - "nop\n" - "nop", - __mrs_s("%0", SYS_ICC_PMR_EL1) - "ands %1, %1, " __stringify(PSR_I_BIT) "\n" - "csel %0, %0, %2, eq", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (flags), "+r" (daif_bits) - : "r" ((unsigned long) GIC_PRIO_IRQOFF) + "mrs %0, daif", + __mrs_s("%0", SYS_ICC_PMR_EL1), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (flags) + : : "memory"); return flags; } +static inline int arch_irqs_disabled_flags(unsigned long flags) +{ + int res; + + asm volatile(ALTERNATIVE( + "and %w0, %w1, #" __stringify(PSR_I_BIT), + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQON), + ARM64_HAS_IRQ_PRIO_MASKING) + : "=&r" (res) + : "r" ((int) flags) + : "memory"); + + return res; +} + static inline unsigned long arch_local_irq_save(void) { unsigned long flags; flags = arch_local_save_flags(); - arch_local_irq_disable(); + /* + * There are too many states with IRQs disabled, just keep the current + * state if interrupts are already disabled/masked. + */ + if (!arch_irqs_disabled_flags(flags)) + arch_local_irq_disable(); return flags; } @@ -108,26 +123,10 @@ static inline void arch_local_irq_restore(unsigned long flags) __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) - : "+r" (flags) : + : "r" (flags) : "memory"); } -static inline int arch_irqs_disabled_flags(unsigned long flags) -{ - int res; - - asm volatile(ALTERNATIVE( - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" - "nop", - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" - "cset %w0, ls", - ARM64_HAS_IRQ_PRIO_MASKING) - : "=&r" (res) - : "r" ((int) flags) - : "memory"); - - return res; -} #endif #endif diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 2ca437ef59fa..44a243754c1b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -30,6 +30,12 @@ {ARM_EXCEPTION_TRAP, "TRAP" }, \ {ARM_EXCEPTION_HYP_GONE, "HYP_GONE" } +/* + * Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code + * that jumps over this. + */ +#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE) + #ifndef __ASSEMBLY__ #include <linux/mm.h> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 034dadec7168..d69c1efc63e7 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -126,7 +126,7 @@ static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu) static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) { if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(elr); + return read_sysreg_el1(SYS_ELR); else return *__vcpu_elr_el1(vcpu); } @@ -134,7 +134,7 @@ static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu) static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v) { if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, elr); + write_sysreg_el1(v, SYS_ELR); else *__vcpu_elr_el1(vcpu) = v; } @@ -186,7 +186,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) return vcpu_read_spsr32(vcpu); if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(spsr); + return read_sysreg_el1(SYS_SPSR); else return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1]; } @@ -199,7 +199,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) } if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, spsr); + write_sysreg_el1(v, SYS_SPSR); else vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v; } @@ -353,6 +353,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } +static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG; +} + +static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, + bool flag) +{ + if (flag) + vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; + else + vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG; +} + static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { @@ -451,13 +465,13 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) */ static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu) { - *vcpu_pc(vcpu) = read_sysreg_el2(elr); - vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr); + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr); - write_sysreg_el2(*vcpu_pc(vcpu), elr); + write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index c328191aa202..f656169db8c3 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -19,12 +19,12 @@ #include <asm/arch_gicv3.h> #include <asm/barrier.h> #include <asm/cpufeature.h> +#include <asm/cputype.h> #include <asm/daifflags.h> #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> -#include <asm/smp_plat.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -484,11 +484,10 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); -static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt, - int cpu) +static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { /* The host's MPIDR is immutable, so let's set it up at boot time */ - cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); + cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); } void __kvm_enable_ssbs(void); @@ -597,11 +596,12 @@ static inline void kvm_arm_vhe_guest_enter(void) * will not signal the CPU of interrupts of lower priority, and the * only way to get out will be via guest exceptions. * Naturally, we want to avoid this. + * + * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + if (system_uses_irq_prio_masking()) dsb(sy); - } } static inline void kvm_arm_vhe_guest_exit(void) @@ -620,9 +620,21 @@ static inline void kvm_arm_vhe_guest_exit(void) isb(); } -static inline bool kvm_arm_harden_branch_predictor(void) +#define KVM_BP_HARDEN_UNKNOWN -1 +#define KVM_BP_HARDEN_WA_NEEDED 0 +#define KVM_BP_HARDEN_NOT_REQUIRED 1 + +static inline int kvm_arm_harden_branch_predictor(void) { - return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); + switch (get_spectre_v2_workaround_state()) { + case ARM64_BP_HARDEN_WA_NEEDED: + return KVM_BP_HARDEN_WA_NEEDED; + case ARM64_BP_HARDEN_NOT_REQUIRED: + return KVM_BP_HARDEN_NOT_REQUIRED; + case ARM64_BP_HARDEN_UNKNOWN: + default: + return KVM_BP_HARDEN_UNKNOWN; + } } #define KVM_SSBD_UNKNOWN -1 diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 286f7e7e1be4..86825aa20852 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -18,7 +18,7 @@ #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ - asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ + asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \ __mrs_s("%0", r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ @@ -28,7 +28,7 @@ #define write_sysreg_elx(v,r,nvh,vh) \ do { \ u64 __val = (u64)(v); \ - asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ + asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \ __msr_s(r##vh, "%x0"), \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ @@ -37,55 +37,15 @@ /* * Unified accessors for registers that have a different encoding * between VHE and non-VHE. They must be specified without their "ELx" - * encoding. + * encoding, but with the SYS_ prefix, as defined in asm/sysreg.h. */ -#define read_sysreg_el2(r) \ - ({ \ - u64 reg; \ - asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\ - "mrs %0, " __stringify(r##_EL1),\ - ARM64_HAS_VIRT_HOST_EXTN) \ - : "=r" (reg)); \ - reg; \ - }) - -#define write_sysreg_el2(v,r) \ - do { \ - u64 __val = (u64)(v); \ - asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\ - "msr " __stringify(r##_EL1) ", %x0",\ - ARM64_HAS_VIRT_HOST_EXTN) \ - : : "rZ" (__val)); \ - } while (0) #define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02) #define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02) #define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12) #define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12) - -/* The VHE specific system registers and their encoding */ -#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0) -#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2) -#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0) -#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1) -#define tcr_EL12 sys_reg(3, 5, 2, 0, 2) -#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0) -#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1) -#define esr_EL12 sys_reg(3, 5, 5, 2, 0) -#define far_EL12 sys_reg(3, 5, 6, 0, 0) -#define mair_EL12 sys_reg(3, 5, 10, 2, 0) -#define amair_EL12 sys_reg(3, 5, 10, 3, 0) -#define vbar_EL12 sys_reg(3, 5, 12, 0, 0) -#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1) -#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0) -#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0) -#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1) -#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2) -#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0) -#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1) -#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2) -#define spsr_EL12 sys_reg(3, 5, 4, 0, 0) -#define elr_EL12 sys_reg(3, 5, 4, 0, 1) +#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1) +#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1) /** * hyp_alternate_select - Generates patchable code sequences that are diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index cdced518378d..14d0bc44d451 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -13,18 +13,23 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + #define check_pgt_cache() do { } while (0) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { + gfp_t gfp = GFP_PGTABLE_USER; struct page *page; - page = alloc_page(PGALLOC_GFP); + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + + page = alloc_page(gfp); if (!page) return NULL; if (!pgtable_pmd_page_ctor(page)) { @@ -61,7 +66,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pud_t *)__get_free_page(PGALLOC_GFP); + return (pud_t *)__get_free_page(GFP_PGTABLE_USER); } static inline void pud_free(struct mm_struct *mm, pud_t *pudp) @@ -89,42 +94,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot) extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp); -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(PGALLOC_GFP); -} - -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(PGALLOC_GFP, 0); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -/* - * Free a PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep) -{ - if (ptep) - free_page((unsigned long)ptep); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep, pmdval_t prot) { diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 30e5e67749e5..db92950bb1a0 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -115,7 +115,6 @@ * Level 2 descriptor (PMD). */ #define PMD_TYPE_MASK (_AT(pmdval_t, 3) << 0) -#define PMD_TYPE_FAULT (_AT(pmdval_t, 0) << 0) #define PMD_TYPE_TABLE (_AT(pmdval_t, 3) << 0) #define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0) #define PMD_TABLE_BIT (_AT(pmdval_t, 1) << 1) @@ -142,8 +141,8 @@ /* * Level 3 descriptor (PTE). */ +#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_TYPE_MASK (_AT(pteval_t, 3) << 0) -#define PTE_TYPE_FAULT (_AT(pteval_t, 0) << 0) #define PTE_TYPE_PAGE (_AT(pteval_t, 3) << 0) #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index c81583be034b..f318258a14be 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -13,7 +13,6 @@ /* * Software defined PTE bits definition. */ -#define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index fca26759081a..3052381baaeb 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -235,29 +235,42 @@ extern void __sync_icache_dcache(pte_t pteval); * * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) + +static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, + pte_t pte) { pte_t old_pte; - if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte); + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return; + + old_pte = READ_ONCE(*ptep); + + if (!pte_valid(old_pte) || !pte_valid(pte)) + return; + if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) + return; /* - * If the existing pte is valid, check for potential race with - * hardware updates of the pte (ptep_set_access_flags safely changes - * valid ptes without going through an invalid entry). + * Check for potential race with hardware updates of the pte + * (ptep_set_access_flags safely changes valid ptes without going + * through an invalid entry). */ - old_pte = READ_ONCE(*ptep); - if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) && - (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) { - VM_WARN_ONCE(!pte_young(pte), - "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), - "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - } + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(old_pte), pte_val(pte)); +} + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte); + + __check_racy_pte_update(mm, ptep, pte); set_pte(ptep, pte); } @@ -324,9 +337,14 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } -static inline pgprot_t mk_sect_prot(pgprot_t prot) +static inline pgprot_t mk_pud_sect_prot(pgprot_t prot) +{ + return __pgprot((pgprot_val(prot) & ~PUD_TABLE_BIT) | PUD_TYPE_SECT); +} + +static inline pgprot_t mk_pmd_sect_prot(pgprot_t prot) { - return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); + return __pgprot((pgprot_val(prot) & ~PMD_TABLE_BIT) | PMD_TYPE_SECT); } #ifdef CONFIG_NUMA_BALANCING diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index dad858b6adc6..b1dd039023ef 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -24,9 +24,15 @@ * means masking more IRQs (or at least that the same IRQs remain masked). * * To mask interrupts, we clear the most significant bit of PMR. + * + * Some code sections either automatically switch back to PSR.I or explicitly + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included + * in the the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. */ -#define GIC_PRIO_IRQON 0xf0 -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_IRQON 0xc0 +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) +#define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ #define PSR_IL_BIT (1 << 20) @@ -217,11 +223,12 @@ static inline void forget_syscall(struct pt_regs *regs) #define fast_interrupts_enabled(regs) \ (!((regs)->pstate & PSR_F_BIT)) -#define GET_USP(regs) \ - (!compat_user_mode(regs) ? (regs)->sp : (regs)->compat_sp) - -#define SET_USP(ptregs, value) \ - (!compat_user_mode(regs) ? ((regs)->sp = value) : ((regs)->compat_sp = value)) +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + if (compat_user_mode(regs)) + return regs->compat_sp; + return regs->sp; +} extern int regs_query_register_offset(const char *name); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, @@ -320,13 +327,20 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -#define GET_IP(regs) ((unsigned long)(regs)->pc) -#define SET_IP(regs, value) ((regs)->pc = ((u64) (value))) - -#define GET_FP(ptregs) ((unsigned long)(ptregs)->regs[29]) -#define SET_FP(ptregs, value) ((ptregs)->regs[29] = ((u64) (value))) +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->pc; +} +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->pc = val; +} -#include <asm-generic/ptrace.h> +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->regs[29]; +} #define procedure_link_pointer(regs) ((regs)->regs[30]) @@ -336,7 +350,6 @@ static inline void procedure_link_pointer_set(struct pt_regs *regs, procedure_link_pointer(regs) = val; } -#undef profile_pc extern unsigned long profile_pc(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index 0418c67f2b8b..bd43d1cf724b 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -9,6 +9,52 @@ #ifdef CONFIG_COMPAT #include <linux/compat.h> +struct compat_sigcontext { + /* We always set these two fields to 0 */ + compat_ulong_t trap_no; + compat_ulong_t error_code; + + compat_ulong_t oldmask; + compat_ulong_t arm_r0; + compat_ulong_t arm_r1; + compat_ulong_t arm_r2; + compat_ulong_t arm_r3; + compat_ulong_t arm_r4; + compat_ulong_t arm_r5; + compat_ulong_t arm_r6; + compat_ulong_t arm_r7; + compat_ulong_t arm_r8; + compat_ulong_t arm_r9; + compat_ulong_t arm_r10; + compat_ulong_t arm_fp; + compat_ulong_t arm_ip; + compat_ulong_t arm_sp; + compat_ulong_t arm_lr; + compat_ulong_t arm_pc; + compat_ulong_t arm_cpsr; + compat_ulong_t fault_address; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + compat_uptr_t uc_link; + compat_stack_t uc_stack; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; + int __unused[32 - (sizeof(compat_sigset_t) / sizeof(int))]; + compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); +}; + +struct compat_sigframe { + struct compat_ucontext uc; + compat_ulong_t retcode[2]; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_sigframe sig; +}; + int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index 7e245b9e03a5..7434844036d3 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -12,9 +12,9 @@ #include <linux/preempt.h> #include <linux/types.h> -#ifdef CONFIG_KERNEL_MODE_NEON +DECLARE_PER_CPU(bool, fpsimd_context_busy); -DECLARE_PER_CPU(bool, kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * may_use_simd - whether it is allowable at this time to issue SIMD @@ -26,15 +26,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * kernel_neon_busy is only set while preemption is disabled, + * fpsimd_context_busy is only set while preemption is disabled, * and is clear whenever preemption is enabled. Since - * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy * cannot change under our feet -- if it's set we cannot be * migrated, and if it's clear we cannot be migrated to a CPU * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !this_cpu_read(kernel_neon_busy); + !this_cpu_read(fpsimd_context_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index cd7f7ce1a56a..a7522fca1105 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -191,6 +191,9 @@ #define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0) #define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1) +#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0) +#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1) + #define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0) @@ -382,6 +385,9 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1) +#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2) + #define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) #define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) #define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) @@ -392,14 +398,17 @@ #define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3)) #define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n)) -#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7) +#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) - #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) +#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) +#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) #define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1) +#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0) #define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3) #define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0) +#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) @@ -444,7 +453,29 @@ #define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7) /* VHE encodings for architectural EL0/1 system registers */ +#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0) +#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2) #define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0) +#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0) +#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1) +#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2) +#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0) +#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1) +#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0) +#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1) +#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0) +#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0) +#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0) +#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0) +#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0) +#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1) +#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0) +#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0) +#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1) +#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2) +#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0) +#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1) +#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2) /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (_BITUL(44)) @@ -549,6 +580,7 @@ /* id_aa64isar1 */ #define ID_AA64ISAR1_SB_SHIFT 36 +#define ID_AA64ISAR1_FRINTTS_SHIFT 32 #define ID_AA64ISAR1_GPI_SHIFT 28 #define ID_AA64ISAR1_GPA_SHIFT 24 #define ID_AA64ISAR1_LRCPC_SHIFT 20 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 2372e97db29c..180b34ec5965 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -65,6 +65,7 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace * TIF_SYSCALL_AUDIT - syscall auditing * TIF_SECCOMP - syscall secure computing + * TIF_SYSCALL_EMU - syscall emulation active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user @@ -80,6 +81,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP 11 +#define TIF_SYSCALL_EMU 12 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -98,6 +100,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_FSCHECK (1 << TIF_FSCHECK) #define _TIF_32BIT (1 << TIF_32BIT) @@ -109,7 +112,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ - _TIF_NOHZ) + _TIF_NOHZ | _TIF_SYSCALL_EMU) #define INIT_THREAD_INFO(tsk) \ { \ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index c9f8dd421c5f..2629a68b8724 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -22,8 +22,13 @@ #define __NR_compat_exit 1 #define __NR_compat_read 3 #define __NR_compat_write 4 +#define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 +#define __NR_compat_clock_getres 247 +#define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_gettime64 403 +#define __NR_compat_clock_getres_time64 406 /* * The following SVCs are ARM private. @@ -33,10 +38,11 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 434 +#define __NR_compat_syscalls 436 #endif #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #ifndef __COMPAT_SYSCALL_NR #include <uapi/asm/unistd.h> diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index aa995920bd34..94ab29cf4f00 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -875,6 +875,10 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig) __SYSCALL(__NR_fsmount, sys_fsmount) #define __NR_fspick 433 __SYSCALL(__NR_fspick, sys_fspick) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) +#define __NR_clone3 435 +__SYSCALL(__NR_clone3, sys_clone3) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 1f94ec19903c..9c15e0a06301 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -17,6 +17,9 @@ #ifndef __ASSEMBLY__ #include <generated/vdso-offsets.h> +#ifdef CONFIG_COMPAT_VDSO +#include <generated/vdso32-offsets.h> +#endif #define VDSO_SYMBOL(base, name) \ ({ \ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h new file mode 100644 index 000000000000..fb60a88b5ed4 --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __COMPAT_BARRIER_H +#define __COMPAT_BARRIER_H + +#ifndef __ASSEMBLY__ +/* + * Warning: This code is meant to be used with + * ENABLE_COMPAT_VDSO only. + */ +#ifndef ENABLE_COMPAT_VDSO +#error This header is meant to be used with ENABLE_COMPAT_VDSO only +#endif + +#ifdef dmb +#undef dmb +#endif + +#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") + +#if __LINUX_ARM_ARCH__ >= 8 +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() dmb(ishld) +#define aarch32_smp_wmb() dmb(ishst) +#else +#define aarch32_smp_mb() dmb(ish) +#define aarch32_smp_rmb() aarch32_smp_mb() +#define aarch32_smp_wmb() dmb(ishst) +#endif + + +#undef smp_mb +#undef smp_rmb +#undef smp_wmb + +#define smp_mb() aarch32_smp_mb() +#define smp_rmb() aarch32_smp_rmb() +#define smp_wmb() aarch32_smp_wmb() + +#endif /* !__ASSEMBLY__ */ + +#endif /* __COMPAT_BARRIER_H */ diff --git a/arch/arm64/include/asm/vdso/compat_gettimeofday.h b/arch/arm64/include/asm/vdso/compat_gettimeofday.h new file mode 100644 index 000000000000..f4812777f5c5 --- /dev/null +++ b/arch/arm64/include/asm/vdso/compat_gettimeofday.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#include <asm/vdso/compat_barrier.h> + +#define __VDSO_USE_SYSCALL ULLONG_MAX + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("r1") = _tz; + register struct __kernel_old_timeval *tv asm("r0") = _tv; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_gettimeofday; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_gettime64; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("r1") = _ts; + register clockid_t clkid asm("r0") = _clkid; + register long ret asm ("r0"); + register long nr asm("r7") = __NR_compat_clock_getres_time64; + + /* The checks below are required for ABI consistency with arm */ + if ((_clkid >= MAX_CLOCKS) && (_ts == NULL)) + return -EINVAL; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ + isb(); + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (res)); + /* + * This isb() is required to prevent that the seq lock is + * speculated. + */ + isb(); + + return res; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + const struct vdso_data *ret; + + /* + * This simply puts &_vdso_data into ret. The reason why we don't use + * `ret = _vdso_data` is that the compiler tends to optimise this in a + * very suboptimal way: instead of keeping &_vdso_data in a register, + * it goes through a relocation almost every time _vdso_data must be + * accessed (even in subfunctions). This is both time and space + * consuming: each relocation uses a word in the code section, and it + * has to be loaded at runtime. + * + * This trick hides the assignment from the compiler. Since it cannot + * track where the pointer comes from, it will only use one relocation + * where __arch_get_vdso_data() is called, and then keep the result in + * a register. + */ + asm volatile("mov %0, %1" : "=r"(ret) : "r"(_vdso_data)); + + return ret; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..b08f476b72b4 --- /dev/null +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define __VDSO_USE_SYSCALL ULLONG_MAX + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline +int gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("x1") = _tz; + register struct __kernel_old_timeval *tv asm("x0") = _tv; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_gettimeofday; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_gettime; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline +int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_getres; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + /* + * clock_mode == 0 implies that vDSO are enabled otherwise + * fallback on syscall. + */ + if (clock_mode) + return __VDSO_USE_SYSCALL; + + /* + * This isb() is required to prevent that the counter value + * is speculated. + */ + isb(); + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + /* + * This isb() is required to prevent that the seq lock is + * speculated.# + */ + isb(); + + return res; +} + +static __always_inline +const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0c731bfc7c8c --- /dev/null +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> + +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arm64_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data + +static __always_inline +int __arm64_get_clock_mode(struct timekeeper *tk) +{ + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; + + return use_syscall; +} +#define __arch_get_clock_mode __arm64_get_clock_mode + +static __always_inline +int __arm64_use_vsyscall(struct vdso_data *vdata) +{ + return !vdata[CS_HRES_COARSE].clock_mode; +} +#define __arch_use_vsyscall __arm64_use_vsyscall + +static __always_inline +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +{ + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; +} +#define __arch_update_vsyscall __arm64_update_vsyscall + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 1a772b162191..a1e72886b30c 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -63,5 +63,7 @@ #define HWCAP2_SVEBITPERM (1 << 4) #define HWCAP2_SVESHA3 (1 << 5) #define HWCAP2_SVESM4 (1 << 6) +#define HWCAP2_FLAGM2 (1 << 7) +#define HWCAP2_FRINT (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index d819a3e8b552..9a507716ae2f 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -229,6 +229,16 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_FW | ((r) & 0xffff)) #define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index e932284993d4..7ed9294e2004 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -62,6 +62,9 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* syscall emulation path in ptrace */ +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 3d448a0bb225..8b0ebce92427 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -146,7 +146,7 @@ struct sve_context { * vector length beyond its initial architectural limit of 2048 bits * (16 quadwords). * - * See linux/Documentation/arm64/sve.txt for a description of the VL/VQ + * See linux/Documentation/arm64/sve.rst for a description of the VL/VQ * terminology. */ #define SVE_VQ_BYTES __SVE_VQ_BYTES /* bytes per quadword */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 9e7dcb2c31c7..478491f07b4f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -28,7 +28,10 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ - sigreturn32.o sys_compat.o + sys_compat.o +ifneq ($(CONFIG_COMPAT_VDSO), y) +obj-$(CONFIG_COMPAT) += sigreturn32.o +endif obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o @@ -62,6 +65,7 @@ obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-y += vdso/ probes/ +obj-$(CONFIG_COMPAT_VDSO) += vdso32/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 2804330c95dc..3a58e9db5cfe 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -152,10 +152,14 @@ static int __init acpi_fadt_sanity_check(void) */ if (table->revision < 5 || (table->revision == 5 && fadt->minor_revision < 1)) { - pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n", + pr_err(FW_BUG "Unsupported FADT revision %d.%d, should be 5.1+\n", table->revision, fadt->minor_revision); - ret = -EINVAL; - goto out; + + if (!fadt->arm_boot_flags) { + ret = -EINVAL; + goto out; + } + pr_err("FADT has ARM boot flags set, assuming 5.1\n"); } if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) { diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 02f08768c298..214685760e1c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -18,9 +18,9 @@ #include <asm/fixmap.h> #include <asm/thread_info.h> #include <asm/memory.h> +#include <asm/signal32.h> #include <asm/smp_plat.h> #include <asm/suspend.h> -#include <asm/vdso_datapage.h> #include <linux/kbuild.h> #include <linux/arm-smccc.h> @@ -66,6 +66,11 @@ int main(void) DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_COMPAT + DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0)); + DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0)); + BLANK(); +#endif DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); @@ -80,33 +85,6 @@ int main(void) BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); BLANK(); - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - BLANK(); - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); - DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); - BLANK(); - DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - BLANK(); - DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 880d79904d36..7fa6828bb488 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,15 @@ #define CLIDR_CTYPE(clidr, level) \ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) +int cache_line_size(void) +{ + if (coherency_max_size != 0) + return coherency_max_size; + + return cache_line_size_of_cpu(); +} +EXPORT_SYMBOL_GPL(cache_line_size); + static inline enum cache_type get_cache_type(int level) { u64 clidr; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ca11ff7bf55e..1e43ba5c79b7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -554,6 +554,17 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) static bool __hardenbp_enab = true; static bool __spectrev2_safe = true; +int get_spectre_v2_workaround_state(void) +{ + if (__spectrev2_safe) + return ARM64_BP_HARDEN_NOT_REQUIRED; + + if (!__hardenbp_enab) + return ARM64_BP_HARDEN_UNKNOWN; + + return ARM64_BP_HARDEN_WA_NEEDED; +} + /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -854,13 +865,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (__spectrev2_safe) + switch (get_spectre_v2_workaround_state()) { + case ARM64_BP_HARDEN_NOT_REQUIRED: return sprintf(buf, "Not affected\n"); - - if (__hardenbp_enab) + case ARM64_BP_HARDEN_WA_NEEDED: return sprintf(buf, "Mitigation: Branch predictor hardening\n"); - - return sprintf(buf, "Vulnerable\n"); + case ARM64_BP_HARDEN_UNKNOWN: + default: + return sprintf(buf, "Vulnerable\n"); + } } ssize_t cpu_show_spec_store_bypass(struct device *dev, diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index aabdabf52fdb..f29f36a65175 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1184,14 +1184,14 @@ static struct undef_hook ssbs_emulation_hook = { static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) { static bool undef_hook_registered = false; - static DEFINE_SPINLOCK(hook_lock); + static DEFINE_RAW_SPINLOCK(hook_lock); - spin_lock(&hook_lock); + raw_spin_lock(&hook_lock); if (!undef_hook_registered) { register_undef_hook(&ssbs_emulation_hook); undef_hook_registered = true; } - spin_unlock(&hook_lock); + raw_spin_unlock(&hook_lock); if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); @@ -1618,6 +1618,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -1629,6 +1630,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 0593665fc7b4..876055e37352 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -82,6 +82,8 @@ static const char *const hwcap_str[] = { "svebitperm", "svesha3", "svesm4", + "flagm2", + "frint", NULL }; diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 3c33d0dd8e0e..d0cf596db82c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -82,8 +82,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) return 0; } -static int __init set_permissions(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; pte_t pte = READ_ONCE(*ptep); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 2df8d0a1d980..9cdc4592da3e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -247,6 +247,7 @@ alternative_else_nop_endif /* * Registers that may be useful after this macro is invoked: * + * x20 - ICC_PMR_EL1 * x21 - aborted SP * x22 - aborted PC * x23 - aborted PSTATE @@ -424,6 +425,38 @@ tsk .req x28 // current thread_info irq_stack_exit .endm +#ifdef CONFIG_ARM64_PSEUDO_NMI + /* + * Set res to 0 if irqs were unmasked in interrupted context. + * Otherwise set res to non-0 value. + */ + .macro test_irqs_unmasked res:req, pmr:req +alternative_if ARM64_HAS_IRQ_PRIO_MASKING + sub \res, \pmr, #GIC_PRIO_IRQON +alternative_else + mov \res, xzr +alternative_endif + .endm +#endif + + .macro gic_prio_kentry_setup, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + mov \tmp, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + + .macro gic_prio_irq_setup, pmr:req, tmp:req +#ifdef CONFIG_ARM64_PSEUDO_NMI + alternative_if ARM64_HAS_IRQ_PRIO_MASKING + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET + msr_s SYS_ICC_PMR_EL1, \tmp + alternative_else_nop_endif +#endif + .endm + .text /* @@ -602,6 +635,7 @@ el1_dbg: cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception @@ -619,20 +653,18 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 + gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI -alternative_if ARM64_HAS_IRQ_PRIO_MASKING - ldr x20, [sp, #S_PMR_SAVE] -alternative_else - mov x20, #GIC_PRIO_IRQON -alternative_endif - cmp x20, #GIC_PRIO_IRQOFF - /* Irqs were disabled, don't trace */ - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_enter +1: #endif + +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off -1: #endif irq_handler @@ -651,14 +683,23 @@ alternative_else_nop_endif bl preempt_schedule_irq // irq en/disable is done inside 1: #endif -#ifdef CONFIG_TRACE_IRQFLAGS + #ifdef CONFIG_ARM64_PSEUDO_NMI /* - * if IRQs were disabled when we received the interrupt, we have an NMI - * and we are not re-enabling interrupt upon eret. Skip tracing. + * When using IRQ priority masking, we can get spurious interrupts while + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a + * section with interrupts disabled. Skip tracing in those cases. */ - cmp x20, #GIC_PRIO_IRQOFF - b.ls 1f + test_irqs_unmasked res=x0, pmr=x20 + cbz x0, 1f + bl asm_nmi_exit +1: +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_ARM64_PSEUDO_NMI + test_irqs_unmasked res=x0, pmr=x20 + cbnz x0, 1f #endif bl trace_hardirqs_on 1: @@ -776,6 +817,7 @@ el0_ia: * Instruction abort handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -821,6 +863,7 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x26, far_el1 + gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -855,11 +898,12 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + gic_prio_kentry_setup tmp=x3 mrs x0, far_el1 mov x1, x25 mov x2, sp bl do_debug_exception - enable_daif + enable_da_f ct_user_exit b ret_to_user el0_inv: @@ -876,7 +920,9 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: + gic_prio_irq_setup pmr=x20, tmp=x0 enable_da_f + #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif @@ -898,6 +944,7 @@ ENDPROC(el0_irq) el1_error: kernel_entry 1 mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror @@ -908,10 +955,11 @@ el0_error: kernel_entry 0 el0_error_naked: mrs x1, esr_el1 + gic_prio_kentry_setup tmp=x2 enable_dbg mov x0, sp bl do_serror - enable_daif + enable_da_f ct_user_exit b ret_to_user ENDPROC(el0_error) @@ -932,6 +980,7 @@ work_pending: */ ret_to_user: disable_daif + gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending @@ -948,6 +997,7 @@ ENDPROC(ret_to_user) */ .align 6 el0_svc: + gic_prio_kentry_setup tmp=x1 mov x0, sp bl el0_svc_handler b ret_to_user diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0cfcf5c237c5..eec4776ae5f0 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -82,7 +82,8 @@ * To prevent this from racing with the manipulation of the task's FPSIMD state * from task context and thereby corrupting the state, it is necessary to * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE - * flag with local_bh_disable() unless softirqs are already masked. + * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to + * run but prevent them to use FPSIMD. * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_cpu field @@ -145,6 +146,56 @@ extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ +DEFINE_PER_CPU(bool, fpsimd_context_busy); +EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); + +static void __get_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, true); + + WARN_ON(busy); +} + +/* + * Claim ownership of the CPU FPSIMD context for use by the calling context. + * + * The caller may freely manipulate the FPSIMD context metadata until + * put_cpu_fpsimd_context() is called. + * + * The double-underscore version must only be called if you know the task + * can't be preempted. + */ +static void get_cpu_fpsimd_context(void) +{ + preempt_disable(); + __get_cpu_fpsimd_context(); +} + +static void __put_cpu_fpsimd_context(void) +{ + bool busy = __this_cpu_xchg(fpsimd_context_busy, false); + + WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ +} + +/* + * Release the CPU FPSIMD context. + * + * Must be called from a context in which get_cpu_fpsimd_context() was + * previously called, with no call to put_cpu_fpsimd_context() in the + * meantime. + */ +static void put_cpu_fpsimd_context(void) +{ + __put_cpu_fpsimd_context(); + preempt_enable(); +} + +static bool have_cpu_fpsimd_context(void) +{ + return !preemptible() && __this_cpu_read(fpsimd_context_busy); +} + /* * Call __sve_free() directly only if you know task can't be scheduled * or preempted. @@ -215,12 +266,10 @@ static void sve_free(struct task_struct *task) * This function should be called only when the FPSIMD/SVE state in * thread_struct is known to be up to date, when preparing to enter * userspace. - * - * Softirqs (and preemption) must be disabled. */ static void task_fpsimd_load(void) { - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), @@ -233,16 +282,14 @@ static void task_fpsimd_load(void) /* * Ensure FPSIMD/SVE storage in memory for the loaded context is up to * date with respect to the CPU registers. - * - * Softirqs (and preemption) must be disabled. */ -void fpsimd_save(void) +static void fpsimd_save(void) { struct fpsimd_last_state_struct const *last = this_cpu_ptr(&fpsimd_last_state); /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ - WARN_ON(!in_softirq() && !irqs_disabled()); + WARN_ON(!have_cpu_fpsimd_context()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { if (system_supports_sve() && test_thread_flag(TIF_SVE)) { @@ -364,7 +411,8 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x) * task->thread.sve_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.uw.fpsimd_state must be up to date before calling this @@ -393,7 +441,8 @@ static void fpsimd_to_sve(struct task_struct *task) * task->thread.uw.fpsimd_state. * * Task can be a non-runnable task, or current. In the latter case, - * softirqs (and preemption) must be disabled. + * the caller must have ownership of the cpu FPSIMD context before calling + * this function. * task->thread.sve_state must point to at least sve_state_size(task) * bytes of allocated kernel memory. * task->thread.sve_state must be up to date before calling this function. @@ -557,7 +606,7 @@ int sve_set_vector_length(struct task_struct *task, * non-SVE thread. */ if (task == current) { - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); } @@ -567,7 +616,7 @@ int sve_set_vector_length(struct task_struct *task, sve_to_fpsimd(task); if (task == current) - local_bh_enable(); + put_cpu_fpsimd_context(); /* * Force reallocation of task SVE state to the correct size @@ -880,7 +929,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) sve_alloc(current); - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); @@ -891,7 +940,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) if (test_and_set_thread_flag(TIF_SVE)) WARN_ON(1); /* SVE access shouldn't have trapped */ - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -935,6 +984,8 @@ void fpsimd_thread_switch(struct task_struct *next) if (!system_supports_fpsimd()) return; + __get_cpu_fpsimd_context(); + /* Save unsaved fpsimd state, if any: */ fpsimd_save(); @@ -949,6 +1000,8 @@ void fpsimd_thread_switch(struct task_struct *next) update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, wrong_task || wrong_cpu); + + __put_cpu_fpsimd_context(); } void fpsimd_flush_thread(void) @@ -958,7 +1011,7 @@ void fpsimd_flush_thread(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_flush_task_state(current); memset(¤t->thread.uw.fpsimd_state, 0, @@ -999,7 +1052,7 @@ void fpsimd_flush_thread(void) current->thread.sve_vl_onexec = 0; } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1011,9 +1064,9 @@ void fpsimd_preserve_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); fpsimd_save(); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1030,7 +1083,8 @@ void fpsimd_signal_preserve_current_state(void) /* * Associate current's FPSIMD context with this cpu - * Preemption must be disabled when calling this function. + * The caller must have ownership of the cpu FPSIMD context before calling + * this function. */ void fpsimd_bind_task_to_cpu(void) { @@ -1076,14 +1130,14 @@ void fpsimd_restore_current_state(void) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1096,7 +1150,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) if (!system_supports_fpsimd()) return; - local_bh_disable(); + get_cpu_fpsimd_context(); current->thread.uw.fpsimd_state = *state; if (system_supports_sve() && test_thread_flag(TIF_SVE)) @@ -1107,7 +1161,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) clear_thread_flag(TIF_FOREIGN_FPSTATE); - local_bh_enable(); + put_cpu_fpsimd_context(); } /* @@ -1133,18 +1187,29 @@ void fpsimd_flush_task_state(struct task_struct *t) /* * Invalidate any task's FPSIMD state that is present on this cpu. - * This function must be called with softirqs disabled. + * The FPSIMD context should be acquired with get_cpu_fpsimd_context() + * before calling this function. */ -void fpsimd_flush_cpu_state(void) +static void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); set_thread_flag(TIF_FOREIGN_FPSTATE); } -#ifdef CONFIG_KERNEL_MODE_NEON +/* + * Save the FPSIMD state to memory and invalidate cpu view. + * This function must be called with preemption disabled. + */ +void fpsimd_save_and_flush_cpu_state(void) +{ + WARN_ON(preemptible()); + __get_cpu_fpsimd_context(); + fpsimd_save(); + fpsimd_flush_cpu_state(); + __put_cpu_fpsimd_context(); +} -DEFINE_PER_CPU(bool, kernel_neon_busy); -EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); +#ifdef CONFIG_KERNEL_MODE_NEON /* * Kernel-side NEON support functions @@ -1170,19 +1235,13 @@ void kernel_neon_begin(void) BUG_ON(!may_use_simd()); - local_bh_disable(); - - __this_cpu_write(kernel_neon_busy, true); + get_cpu_fpsimd_context(); /* Save unsaved fpsimd state, if any: */ fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); - - preempt_disable(); - - local_bh_enable(); } EXPORT_SYMBOL(kernel_neon_begin); @@ -1197,15 +1256,10 @@ EXPORT_SYMBOL(kernel_neon_begin); */ void kernel_neon_end(void) { - bool busy; - if (!system_supports_fpsimd()) return; - busy = __this_cpu_xchg(kernel_neon_busy, false); - WARN_ON(!busy); /* No matching kernel_neon_begin()? */ - - preempt_enable(); + put_cpu_fpsimd_context(); } EXPORT_SYMBOL(kernel_neon_end); @@ -1297,8 +1351,7 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); break; case CPU_PM_EXIT: break; diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 04ca08086d35..2b85c0d6fa3d 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -67,7 +67,11 @@ #ifdef CONFIG_EFI -__efistub_stext_offset = stext - _text; +/* + * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol: + * https://github.com/ClangBuiltLinux/linux/issues/561 + */ +__efistub_stext_offset = ABSOLUTE(stext - _text); /* * The EFI stub has its own symbol namespace prefixed by __efistub_, to diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index c70034fbd4ce..04a327ccf84d 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -16,8 +16,10 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/irqchip.h> +#include <linux/kprobes.h> #include <linux/seq_file.h> #include <linux/vmalloc.h> +#include <asm/daifflags.h> #include <asm/vmap_stack.h> unsigned long irq_err_count; @@ -64,4 +66,28 @@ void __init init_IRQ(void) irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); + + if (system_uses_irq_prio_masking()) { + /* + * Now that we have a stack for our IRQ handler, set + * the PMR/PSR pair to a consistent state. + */ + WARN_ON(read_sysreg(daif) & PSR_A_BIT); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + } +} + +/* + * Stubs to make nmi_enter/exit() code callable from ASM + */ +asmlinkage void notrace asm_nmi_enter(void) +{ + nmi_enter(); +} +NOKPROBE_SYMBOL(asm_nmi_enter); + +asmlinkage void notrace asm_nmi_exit(void) +{ + nmi_exit(); } +NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 07bf740bea91..2514fd6f12cb 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -53,7 +53,7 @@ static void *image_load(struct kimage *image, /* * We require a kernel with an unambiguous Image header. Per - * Documentation/booting.txt, this is the case when image_size + * Documentation/arm64/booting.rst, this is the case when image_size * is non-zero (practically speaking, since v3.17). */ h = (struct arm64_image_header *)kernel; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index e23a68a5808f..46e643e30708 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -21,6 +21,7 @@ void *module_alloc(unsigned long size) { + u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; gfp_t gfp_mask = GFP_KERNEL; void *p; @@ -28,9 +29,12 @@ void *module_alloc(unsigned long size) if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) gfp_mask |= __GFP_NOWARN; + if (IS_ENABLED(CONFIG_KASAN)) + /* don't exceed the static module region - see below */ + module_alloc_end = MODULES_END; + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + MODULES_VSIZE, - gfp_mask, PAGE_KERNEL_EXEC, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && @@ -46,7 +50,7 @@ void *module_alloc(unsigned long size) */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 88ce502c8e6f..bd5dfffca272 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -122,8 +122,10 @@ void *alloc_insn_page(void) void *page; page = vmalloc_exec(PAGE_SIZE); - if (page) + if (page) { set_memory_ro((unsigned long)page, 1); + set_vm_flush_reset_perms(page); + } return page; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 9856395ccdb7..6a869d9f304f 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -83,7 +83,7 @@ static void __cpu_do_idle_irqprio(void) * be raised. */ pmr = gic_read_pmr(); - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); __cpu_do_idle(); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index da2441d7b066..3cf3b135027e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1808,8 +1808,12 @@ static void tracehook_report_syscall(struct pt_regs *regs, int syscall_trace_enter(struct pt_regs *regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (test_thread_flag(TIF_SYSCALL_TRACE) || + test_thread_flag(TIF_SYSCALL_EMU)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + if (!in_syscall(regs) || test_thread_flag(TIF_SYSCALL_EMU)) + return -1; + } /* Do the secure computing after ptrace; failures should be fast. */ if (secure_computing(NULL) == -1) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7e541f947b4c..9c4bad7d7131 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -283,6 +283,11 @@ void __init setup_arch(char **cmdline_p) setup_machine_fdt(__fdt_pointer); + /* + * Initialise the static keys early as they may be enabled by the + * cpufeature code and early parameters. + */ + jump_label_init(); parse_early_param(); /* diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 331d1e5acad4..12a585386c2f 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -18,42 +18,7 @@ #include <asm/traps.h> #include <linux/uaccess.h> #include <asm/unistd.h> - -struct compat_sigcontext { - /* We always set these two fields to 0 */ - compat_ulong_t trap_no; - compat_ulong_t error_code; - - compat_ulong_t oldmask; - compat_ulong_t arm_r0; - compat_ulong_t arm_r1; - compat_ulong_t arm_r2; - compat_ulong_t arm_r3; - compat_ulong_t arm_r4; - compat_ulong_t arm_r5; - compat_ulong_t arm_r6; - compat_ulong_t arm_r7; - compat_ulong_t arm_r8; - compat_ulong_t arm_r9; - compat_ulong_t arm_r10; - compat_ulong_t arm_fp; - compat_ulong_t arm_ip; - compat_ulong_t arm_sp; - compat_ulong_t arm_lr; - compat_ulong_t arm_pc; - compat_ulong_t arm_cpsr; - compat_ulong_t fault_address; -}; - -struct compat_ucontext { - compat_ulong_t uc_flags; - compat_uptr_t uc_link; - compat_stack_t uc_stack; - struct compat_sigcontext uc_mcontext; - compat_sigset_t uc_sigmask; - int __unused[32 - (sizeof (compat_sigset_t) / sizeof (int))]; - compat_ulong_t uc_regspace[128] __attribute__((__aligned__(8))); -}; +#include <asm/vdso.h> struct compat_vfp_sigframe { compat_ulong_t magic; @@ -81,16 +46,6 @@ struct compat_aux_sigframe { unsigned long end_magic; } __attribute__((__aligned__(8))); -struct compat_sigframe { - struct compat_ucontext uc; - compat_ulong_t retcode[2]; -}; - -struct compat_rt_sigframe { - struct compat_siginfo info; - struct compat_sigframe sig; -}; - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) @@ -387,6 +342,30 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = ptr_to_compat(ka->sa.sa_restorer); } else { /* Set up sigreturn pointer */ +#ifdef CONFIG_COMPAT_VDSO + void *vdso_base = current->mm->context.vdso; + void *vdso_trampoline; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_rt_sigreturn_arm); + } + } else { + if (thumb) { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_thumb); + } else { + vdso_trampoline = VDSO_SYMBOL(vdso_base, + compat_sigreturn_arm); + } + } + + retcode = ptr_to_compat(vdso_trampoline) + thumb; +#else unsigned int idx = thumb << 1; if (ka->sa.sa_flags & SA_SIGINFO) @@ -394,6 +373,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, retcode = (unsigned long)current->mm->context.vdso + (idx << 2) + thumb; +#endif } regs->regs[0] = usig; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 3e53ffa07994..f5b04dd8a710 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -27,7 +27,7 @@ * aff0 = mpidr_masked & 0xff; * aff1 = mpidr_masked & 0xff00; * aff2 = mpidr_masked & 0xff0000; - * aff2 = mpidr_masked & 0xff00000000; + * aff3 = mpidr_masked & 0xff00000000; * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); *} * Input registers: rs0, rs1, rs2, rs3, mpidr, mask diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6dcf9607d770..ea90d3bd9253 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -181,11 +181,7 @@ static void init_gic_priority_masking(void) WARN_ON(!(cpuflags & PSR_I_BIT)); - gic_write_pmr(GIC_PRIO_IRQOFF); - - /* We can only unmask PSR.I if we can take aborts */ - if (!(cpuflags & PSR_A_BIT)) - write_sysreg(cpuflags & ~PSR_I_BIT, daif); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); } /* @@ -424,11 +420,6 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); - /* - * Initialise the static keys early as they may be enabled by the - * cpufeature code. - */ - jump_label_init(); cpuinfo_store_boot_cpu(); /* @@ -834,18 +825,23 @@ void arch_irq_work_raise(void) } #endif -/* - * ipi_cpu_stop - handle IPI from smp_send_stop() - */ -static void ipi_cpu_stop(unsigned int cpu) +static void local_cpu_stop(void) { - set_cpu_online(cpu, false); + set_cpu_online(smp_processor_id(), false); local_daif_mask(); sdei_mask_local_cpu(); + cpu_park_loop(); +} - while (1) - cpu_relax(); +/* + * We need to implement panic_smp_self_stop() for parallel panic() calls, so + * that cpu_online_mask gets correctly updated and smp_send_stop() can skip + * CPUs that have already stopped themselves. + */ +void panic_smp_self_stop(void) +{ + local_cpu_stop(); } #ifdef CONFIG_KEXEC_CORE @@ -898,7 +894,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) case IPI_CPU_STOP: irq_enter(); - ipi_cpu_stop(cpu); + local_cpu_stop(); irq_exit(); break; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 985721a1264c..8c03456dade6 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -55,16 +55,19 @@ static void dump_backtrace_entry(unsigned long where) printk(" %pS\n", (void *)where); } -static void __dump_instr(const char *lvl, struct pt_regs *regs) +static void dump_kernel_instr(const char *lvl, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str; int i; + if (user_mode(regs)) + return; + for (i = -4; i < 1; i++) { unsigned int val, bad; - bad = get_user(val, &((u32 *)addr)[i]); + bad = aarch64_insn_read(&((u32 *)addr)[i], &val); if (!bad) p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val); @@ -73,19 +76,8 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs) break; } } - printk("%sCode: %s\n", lvl, str); -} -static void dump_instr(const char *lvl, struct pt_regs *regs) -{ - if (!user_mode(regs)) { - mm_segment_t fs = get_fs(); - set_fs(KERNEL_DS); - __dump_instr(lvl, regs); - set_fs(fs); - } else { - __dump_instr(lvl, regs); - } + printk("%sCode: %s\n", lvl, str); } void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) @@ -171,8 +163,7 @@ static int __die(const char *str, int err, struct pt_regs *regs) print_modules(); show_regs(regs); - if (!user_mode(regs)) - dump_instr(KERN_EMERG, regs); + dump_kernel_instr(KERN_EMERG, regs); return ret; } @@ -242,16 +233,16 @@ void arm64_force_sig_fault(int signo, int code, void __user *addr, { arm64_show_signal(signo, str); if (signo == SIGKILL) - force_sig(SIGKILL, current); + force_sig(SIGKILL); else - force_sig_fault(signo, code, addr, current); + force_sig_fault(signo, code, addr); } void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str) { arm64_show_signal(SIGBUS, str); - force_sig_mceerr(code, addr, lsb, current); + force_sig_mceerr(code, addr, lsb); } void arm64_force_sig_ptrace_errno_trap(int errno, void __user *addr, @@ -880,6 +871,10 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) /* * The CPU can't make progress. The exception may have * been imprecise. + * + * Neoverse-N1 #1349291 means a non-KVM SError reported as + * Unrecoverable should be treated as Uncontainable. We + * call arm64_serror_panic() in both cases. */ return true; diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 663b166241d0..354b11e27c07 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -20,41 +20,212 @@ #include <linux/slab.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> #include <asm/cacheflush.h> #include <asm/signal32.h> #include <asm/vdso.h> -#include <asm/vdso_datapage.h> extern char vdso_start[], vdso_end[]; -static unsigned long vdso_pages __ro_after_init; +#ifdef CONFIG_COMPAT_VDSO +extern char vdso32_start[], vdso32_end[]; +#endif /* CONFIG_COMPAT_VDSO */ + +/* vdso_lookup arch_index */ +enum arch_vdso_type { + ARM64_VDSO = 0, +#ifdef CONFIG_COMPAT_VDSO + ARM64_VDSO32 = 1, +#endif /* CONFIG_COMPAT_VDSO */ +}; +#ifdef CONFIG_COMPAT_VDSO +#define VDSO_TYPES (ARM64_VDSO32 + 1) +#else +#define VDSO_TYPES (ARM64_VDSO + 1) +#endif /* CONFIG_COMPAT_VDSO */ + +struct __vdso_abi { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { + { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, + }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "vdso32", + .vdso_code_start = vdso32_start, + .vdso_code_end = vdso32_end, + }, +#endif /* CONFIG_COMPAT_VDSO */ +}; /* * The vDSO data page. */ static union { - struct vdso_data data; + struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; + +static int __vdso_remap(enum arch_vdso_type arch_index, + const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start; + + if (vdso_size != new_size) + return -EINVAL; + + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static int __vdso_init(enum arch_vdso_type arch_index) +{ + int i; + struct page **vdso_pagelist; + unsigned long pfn; + + if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } + + vdso_lookup[arch_index].vdso_pages = ( + vdso_lookup[arch_index].vdso_code_end - + vdso_lookup[arch_index].vdso_code_start) >> + PAGE_SHIFT; + + /* Allocate the vDSO pagelist, plus a page for the data. */ + vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + return -ENOMEM; + + /* Grab the vDSO data page. */ + vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); + + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + + for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) + vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + + vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; + vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + + return 0; +} + +static int __setup_additional_pages(enum arch_vdso_type arch_index, + struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp) +{ + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + + vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + PAGE_SIZE; + + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = ERR_PTR(vdso_base); + goto up_fail; + } + + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_lookup[arch_index].dm); + if (IS_ERR(ret)) + goto up_fail; + + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_lookup[arch_index].cm); + if (IS_ERR(ret)) + goto up_fail; + + return 0; + +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); +} #ifdef CONFIG_COMPAT /* * Create and map the vectors page for AArch32 tasks. */ +#ifdef CONFIG_COMPAT_VDSO +static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + return __vdso_remap(ARM64_VDSO32, sm, new_vma); +} +#endif /* CONFIG_COMPAT_VDSO */ + +/* + * aarch32_vdso_pages: + * 0 - kuser helpers + * 1 - sigreturn code + * or (CONFIG_COMPAT_VDSO): + * 0 - kuser helpers + * 1 - vdso data + * 2 - vdso code + */ #define C_VECTORS 0 +#ifdef CONFIG_COMPAT_VDSO +#define C_VVAR 1 +#define C_VDSO 2 +#define C_PAGES (C_VDSO + 1) +#else #define C_SIGPAGE 1 #define C_PAGES (C_SIGPAGE + 1) +#endif /* CONFIG_COMPAT_VDSO */ static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; -static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { +static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { { .name = "[vectors]", /* ABI */ .pages = &aarch32_vdso_pages[C_VECTORS], }, +#ifdef CONFIG_COMPAT_VDSO + { + .name = "[vvar]", + }, + { + .name = "[vdso]", + .mremap = aarch32_vdso_mremap, + }, +#else { .name = "[sigpage]", /* ABI */ .pages = &aarch32_vdso_pages[C_SIGPAGE], }, +#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -77,7 +248,33 @@ static int aarch32_alloc_kuser_vdso_page(void) return 0; } -static int __init aarch32_alloc_vdso_pages(void) +#ifdef CONFIG_COMPAT_VDSO +static int __aarch32_alloc_vdso_pages(void) +{ + int ret; + + vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; + vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; + + ret = __vdso_init(ARM64_VDSO32); + if (ret) + return ret; + + ret = aarch32_alloc_kuser_vdso_page(); + if (ret) { + unsigned long c_vvar = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); + unsigned long c_vdso = + (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); + + free_page(c_vvar); + free_page(c_vdso); + } + + return ret; +} +#else +static int __aarch32_alloc_vdso_pages(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; @@ -98,6 +295,12 @@ static int __init aarch32_alloc_vdso_pages(void) return ret; } +#endif /* CONFIG_COMPAT_VDSO */ + +static int __init aarch32_alloc_vdso_pages(void) +{ + return __aarch32_alloc_vdso_pages(); +} arch_initcall(aarch32_alloc_vdso_pages); static int aarch32_kuser_helpers_setup(struct mm_struct *mm) @@ -119,6 +322,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) return PTR_ERR_OR_ZERO(ret); } +#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -146,6 +350,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) out: return PTR_ERR_OR_ZERO(ret); } +#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { @@ -159,7 +364,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (ret) goto out; +#ifdef CONFIG_COMPAT_VDSO + ret = __setup_additional_pages(ARM64_VDSO32, + mm, + bprm, + uses_interp); +#else ret = aarch32_sigreturn_setup(mm); +#endif /* CONFIG_COMPAT_VDSO */ out: up_write(&mm->mmap_sem); @@ -170,18 +382,18 @@ out: static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_end - vdso_start; - - if (vdso_size != new_size) - return -EINVAL; - - current->mm->context.vdso = (void *)new_vma->vm_start; - - return 0; + return __vdso_remap(ARM64_VDSO, sm, new_vma); } -static struct vm_special_mapping vdso_spec[2] __ro_after_init = { +/* + * aarch64_vdso_pages: + * 0 - vvar + * 1 - vdso + */ +#define A_VVAR 0 +#define A_VDSO 1 +#define A_PAGES (A_VDSO + 1) +static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { { .name = "[vvar]", }, @@ -193,37 +405,10 @@ static struct vm_special_mapping vdso_spec[2] __ro_after_init = { static int __init vdso_init(void) { - int i; - struct page **vdso_pagelist; - unsigned long pfn; - - if (memcmp(vdso_start, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - return -EINVAL; - } - - vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), - GFP_KERNEL); - if (vdso_pagelist == NULL) - return -ENOMEM; - - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); - - - /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_start); - - for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; + vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; - vdso_spec[0].pages = &vdso_pagelist[0]; - vdso_spec[1].pages = &vdso_pagelist[1]; - - return 0; + return __vdso_init(ARM64_VDSO); } arch_initcall(vdso_init); @@ -231,84 +416,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - void *ret; - - vdso_text_len = vdso_pages << PAGE_SHIFT; - /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + int ret; if (down_write_killable(&mm->mmap_sem)) return -EINTR; - vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); - if (IS_ERR_VALUE(vdso_base)) { - ret = ERR_PTR(vdso_base); - goto up_fail; - } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - &vdso_spec[0]); - if (IS_ERR(ret)) - goto up_fail; - - vdso_base += PAGE_SIZE; - mm->context.vdso = (void *)vdso_base; - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - &vdso_spec[1]); - if (IS_ERR(ret)) - goto up_fail; + ret = __setup_additional_pages(ARM64_VDSO, + mm, + bprm, + uses_interp); up_write(&mm->mmap_sem); - return 0; - -up_fail: - mm->context.vdso = NULL; - up_write(&mm->mmap_sem); - return PTR_ERR(ret); -} -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -void update_vsyscall(struct timekeeper *tk) -{ - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; - - ++vdso_data->tb_seq_count; - smp_wmb(); - - vdso_data->use_syscall = use_syscall; - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; - - /* Read without the seqlock held by clock_getres() */ - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); - - if (!use_syscall) { - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_sec; - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mono_mult = tk->tkr_mono.mult; - vdso_data->cs_raw_mult = tk->tkr_raw.mult; - /* tkr_mono.shift == tkr_raw.shift */ - vdso_data->cs_shift = tk->tkr_mono.shift; - } - - smp_wmb(); - ++vdso_data->tb_seq_count; -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; + return ret; } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index fa230ff09aa1..4ab863045188 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -6,7 +6,12 @@ # Heavily based on the vDSO Makefiles for other archs. # -obj-vdso := gettimeofday.o note.o sigreturn.o +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 +include $(srctree)/lib/vdso/Makefile + +obj-vdso := vgettimeofday.o note.o sigreturn.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -15,6 +20,31 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ --build-id -n -T +ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Bsymbolic + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y +KCOV_INSTRUMENT := n + +ifeq ($(c-gettimeofday-y),) +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +else +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) +endif + +# Clang versions less than 8 do not support -mcmodel=tiny +ifeq ($(CONFIG_CC_IS_CLANG), y) + ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0) + CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny + endif +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n @@ -28,6 +58,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,ld) + $(call if_changed,vdso_check) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -42,13 +73,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) -# Assembly rules for the .S files -$(obj-vdso): %.o: %.S FORCE - $(call if_changed_dep,vdsoas) - # Actual build commands -quiet_cmd_vdsoas = VDSOA $@ - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOCC $@ + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< # Install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 80f780f56e0d..e69de29bb2d1 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -1,323 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Userspace implementations of gettimeofday() and friends. - * - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon <will.deacon@arm.com> - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - -#define NSEC_PER_SEC_LO16 0xca00 -#define NSEC_PER_SEC_HI16 0x3b9a - -vdso_data .req x6 -seqcnt .req w7 -w_tmp .req w8 -x_tmp .req x8 - -/* - * Conventions for macro arguments: - * - An argument is write-only if its name starts with "res". - * - All other arguments are read-only, unless otherwise specified. - */ - - .macro seqcnt_acquire -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] - tbnz seqcnt, #0, 9999b - dmb ishld - .endm - - .macro seqcnt_check fail - dmb ishld - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] - cmp w_tmp, seqcnt - b.ne \fail - .endm - - .macro syscall_check fail - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] - cbnz w_tmp, \fail - .endm - - .macro get_nsec_per_sec res - mov \res, #NSEC_PER_SEC_LO16 - movk \res, #NSEC_PER_SEC_HI16, lsl #16 - .endm - - /* - * Returns the clock delta, in nanoseconds left-shifted by the clock - * shift. - */ - .macro get_clock_shifted_nsec res, cycle_last, mult - /* Read the virtual counter. */ - isb - mrs x_tmp, cntvct_el0 - /* Calculate cycle delta and convert to ns. */ - sub \res, x_tmp, \cycle_last - /* We can only guarantee 56 bits of precision. */ - movn x_tmp, #0xff00, lsl #48 - and \res, x_tmp, \res - mul \res, \res, \mult - /* - * Fake address dependency from the value computed from the counter - * register to subsequent data page accesses so that the sequence - * locking also orders the read of the counter. - */ - and x_tmp, \res, xzr - add vdso_data, vdso_data, x_tmp - .endm - - /* - * Returns in res_{sec,nsec} the REALTIME timespec, based on the - * "wall time" (xtime) and the clock_mono delta. - */ - .macro get_ts_realtime res_sec, res_nsec, \ - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec - add \res_nsec, \clock_nsec, \xtime_nsec - udiv x_tmp, \res_nsec, \nsec_to_sec - add \res_sec, \xtime_sec, x_tmp - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec - .endm - - /* - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, - * used for CLOCK_MONOTONIC_RAW. - */ - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec - udiv \res_sec, \clock_nsec, \nsec_to_sec - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec - .endm - - /* sec and nsec are modified in place. */ - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec - /* Add timespec. */ - add \sec, \sec, \ts_sec - add \nsec, \nsec, \ts_nsec - - /* Normalise the new timespec. */ - cmp \nsec, \nsec_to_sec - b.lt 9999f - sub \nsec, \nsec, \nsec_to_sec - add \sec, \sec, #1 -9999: - cmp \nsec, #0 - b.ge 9998f - add \nsec, \nsec, \nsec_to_sec - sub \sec, \sec, #1 -9998: - .endm - - .macro clock_gettime_return, shift=0 - .if \shift == 1 - lsr x11, x11, x12 - .endif - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret - .endm - - .macro jump_slot jumptable, index, label - .if (. - \jumptable) != 4 * (\index) - .error "Jump slot index mismatch" - .endif - b \label - .endm - - .text - -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__kernel_gettimeofday) - .cfi_startproc - adr vdso_data, _vdso_data - /* If tv is NULL, skip to the timezone code. */ - cbz x0, 2f - - /* Compute the time of day. */ -1: seqcnt_acquire - syscall_check fail=4f - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=1b - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - /* Convert ns to us. */ - mov x13, #1000 - lsl x13, x13, x12 - udiv x11, x11, x13 - stp x10, x11, [x0, #TVAL_TV_SEC] -2: - /* If tz is NULL, return 0. */ - cbz x1, 3f - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] - stp w4, w5, [x1, #TZ_MINWEST] -3: - mov x0, xzr - ret -4: - /* Syscall fallback. */ - mov x8, #__NR_gettimeofday - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_gettimeofday) - -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE - -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__kernel_clock_gettime) - .cfi_startproc - cmp w0, #JUMPSLOT_MAX - b.hi syscall - adr vdso_data, _vdso_data - adr x_tmp, jumptable - add x_tmp, x_tmp, w0, uxtw #2 - br x_tmp - - ALIGN -jumptable: - jump_slot jumptable, CLOCK_REALTIME, realtime - jump_slot jumptable, CLOCK_MONOTONIC, monotonic - b syscall - b syscall - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse - - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) - .error "Wrong jumptable size" - .endif - - ALIGN -realtime: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=realtime - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - lsl x4, x4, x12 - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic_raw: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_raw_mult, w12 = cs_shift */ - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic_raw - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -realtime_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - seqcnt_check fail=realtime_coarse - clock_gettime_return - - ALIGN -monotonic_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic_coarse - - /* Computations are done in (non-shifted) nsecs. */ - get_nsec_per_sec res=x9 - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return - - ALIGN -syscall: /* Syscall fallback. */ - mov x8, #__NR_clock_gettime - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_clock_gettime) - -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__kernel_clock_getres) - .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne - b.ne 1f - - adr vdso_data, _vdso_data - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] - b 2f -1: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 4f - ldr x2, 5f -2: - cbz x1, 3f - stp xzr, x2, [x1] - -3: /* res == NULL. */ - mov w0, wzr - ret - -4: /* Syscall fallback. */ - mov x8, #__NR_clock_getres - svc #0 - ret -5: - .quad CLOCK_COARSE_RES - .cfi_endproc -ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..747635501a14 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgettimeofday.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include <linux/time.h> +#include <linux/types.h> + +int __kernel_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __kernel_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/arch/arm64/kernel/vdso32/.gitignore b/arch/arm64/kernel/vdso32/.gitignore new file mode 100644 index 000000000000..4fea950fa5ed --- /dev/null +++ b/arch/arm64/kernel/vdso32/.gitignore @@ -0,0 +1,2 @@ +vdso.lds +vdso.so.raw diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile new file mode 100644 index 000000000000..288c14d30b45 --- /dev/null +++ b/arch/arm64/kernel/vdso32/Makefile @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for vdso32 +# + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32 +include $(srctree)/lib/vdso/Makefile + +COMPATCC := $(CROSS_COMPILE_COMPAT)gcc + +# Same as cc-*option, but using COMPATCC instead of CC +cc32-option = $(call try-run,\ + $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) +cc32-disable-warning = $(call try-run,\ + $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) +cc32-ldoption = $(call try-run,\ + $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) + +# We cannot use the global flags to compile the vDSO files, the main reason +# being that the 32-bit compiler may be older than the main (64-bit) compiler +# and therefore may not understand flags set using $(cc-option ...). Besides, +# arch-specific options should be taken from the arm Makefile instead of the +# arm64 one. +# As a result we set our own flags here. + +# From top-level Makefile +# NOSTDINC_FLAGS +VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include) +VDSO_CPPFLAGS += $(LINUXINCLUDE) +VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS) + +# Common C and assembly flags +# From top-level Makefile +VDSO_CAFLAGS := $(VDSO_CPPFLAGS) +VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) +ifdef CONFIG_DEBUG_INFO +VDSO_CAFLAGS += -g +endif +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y) +VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO +endif + +# From arm Makefile +VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm) +VDSO_CAFLAGS += -mabi=aapcs-linux -mfloat-abi=soft +ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) +VDSO_CAFLAGS += -mbig-endian +else +VDSO_CAFLAGS += -mlittle-endian +endif + +# From arm vDSO Makefile +VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector +VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING + +# Try to compile for ARMv8. If the compiler is too old and doesn't support it, +# fall back to v7. There is no easy way to check for what architecture the code +# is being compiled, so define a macro specifying that (see arch/arm/Makefile). +VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ + -march=armv7-a -D__LINUX_ARM_ARCH__=7) + +VDSO_CFLAGS := $(VDSO_CAFLAGS) +VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 +# KBUILD_CFLAGS from top-level Makefile +VDSO_CFLAGS += -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ + -fno-strict-aliasing -fno-common \ + -Werror-implicit-function-declaration \ + -Wno-format-security \ + -std=gnu89 +VDSO_CFLAGS += -O2 +# Some useful compiler-dependent flags from top-level Makefile +VDSO_CFLAGS += $(call cc32-option,-Wdeclaration-after-statement,) +VDSO_CFLAGS += $(call cc32-option,-Wno-pointer-sign) +VDSO_CFLAGS += $(call cc32-option,-fno-strict-overflow) +VDSO_CFLAGS += $(call cc32-option,-Werror=strict-prototypes) +VDSO_CFLAGS += $(call cc32-option,-Werror=date-time) +VDSO_CFLAGS += $(call cc32-option,-Werror=incompatible-pointer-types) + +# The 32-bit compiler does not provide 128-bit integers, which are used in +# some headers that are indirectly included from the vDSO code. +# This hack makes the compiler happy and should trigger a warning/error if +# variables of such type are referenced. +VDSO_CFLAGS += -D__uint128_t='void*' +# Silence some warnings coming from headers that operate on long's +# (on GCC 4.8 or older, there is unfortunately no way to silence this warning) +VDSO_CFLAGS += $(call cc32-disable-warning,shift-count-overflow) +VDSO_CFLAGS += -Wno-int-to-pointer-cast + +VDSO_AFLAGS := $(VDSO_CAFLAGS) +VDSO_AFLAGS += -D__ASSEMBLY__ + +VDSO_LDFLAGS := $(VDSO_CPPFLAGS) +# From arm vDSO Makefile +VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 +VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id) +VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) + + +# Borrow vdsomunge.c from the arm vDSO +# We have to use a relative path because scripts/Makefile.host prefixes +# $(hostprogs-y) with $(obj) +munge := ../../../arm/vdso/vdsomunge +hostprogs-y := $(munge) + +c-obj-vdso := note.o +c-obj-vdso-gettimeofday := vgettimeofday.o +asm-obj-vdso := sigreturn.o + +ifneq ($(c-gettimeofday-y),) +VDSO_CFLAGS_gettimeofday_o += -include $(c-gettimeofday-y) +endif + +VDSO_CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os + +# Build rules +targets := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) vdso.so vdso.so.dbg vdso.so.raw +c-obj-vdso := $(addprefix $(obj)/, $(c-obj-vdso)) +c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) +asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) +obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) + +obj-y += vdso.o +extra-y += vdso.lds +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) + +# Force dependency (vdso.s includes vdso.so through incbin) +$(obj)/vdso.o: $(obj)/vdso.so + +include/generated/vdso32-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# Strip rule for vdso.so +$(obj)/vdso.so: OBJCOPYFLAGS := -S +$(obj)/vdso.so: $(obj)/vdso.so.dbg FORCE + $(call if_changed,objcopy) + +$(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE + $(call if_changed,vdsomunge) + +# Link rule for the .so file, .lds has to be first +$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) + $(call if_changed,vdso_check) + +# Compilation rules for the vDSO sources +$(c-obj-vdso): %.o: %.c FORCE + $(call if_changed_dep,vdsocc) +$(c-obj-vdso-gettimeofday): %.o: %.c FORCE + $(call if_changed_dep,vdsocc_gettimeofday) +$(asm-obj-vdso): %.o: %.S FORCE + $(call if_changed_dep,vdsoas) + +# Actual build commands +quiet_cmd_vdsold = VDSOL $@ + cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ + -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ +quiet_cmd_vdsocc = VDSOC $@ + cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< +quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@ + cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $< +quiet_cmd_vdsoas = VDSOA $@ + cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $< + +quiet_cmd_vdsomunge = MUNGE $@ + cmd_vdsomunge = $(obj)/$(munge) $< $@ + +# Generate vDSO offsets using helper script (borrowed from the 64-bit vDSO) +gen-vdsosym := $(srctree)/$(src)/../vdso/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ +# The AArch64 nm should be able to read an AArch32 binary + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +# Install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/vdso32.so + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/arm64/kernel/vdso32/note.c b/arch/arm64/kernel/vdso32/note.c new file mode 100644 index 000000000000..eff5bf9efb8b --- /dev/null +++ b/arch/arm64/kernel/vdso32/note.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012-2018 ARM Limited + * + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> +#include <linux/build-salt.h> + +ELFNOTE32("Linux", 0, LINUX_VERSION_CODE); +BUILD_SALT; diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S new file mode 100644 index 000000000000..1a81277c2d09 --- /dev/null +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file provides both A32 and T32 versions, in accordance with the + * arm sigreturn code. + * + * Copyright (C) 2018 ARM Limited + */ + +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +#define ARM_ENTRY(name) \ + ENTRY(name) + +#define ARM_ENDPROC(name) \ + .type name, %function; \ + END(name) + + .text + + .arm + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_arm) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_arm) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_arm) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_arm) + + .thumb + .fnstart + .save {r0-r15} + .pad #COMPAT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_sigreturn_thumb) + mov r7, #__NR_compat_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_sigreturn_thumb) + + .fnstart + .save {r0-r15} + .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET + nop +ARM_ENTRY(__kernel_rt_sigreturn_thumb) + mov r7, #__NR_compat_rt_sigreturn + svc #0 + .fnend +ARM_ENDPROC(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S new file mode 100644 index 000000000000..e72ac7bc4c04 --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Limited + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/const.h> +#include <asm/page.h> + + .globl vdso32_start, vdso32_end + .section .rodata + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/arm64/kernel/vdso32/vdso.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/arm64/kernel/vdso32/vdso.lds.S b/arch/arm64/kernel/vdso32/vdso.lds.S new file mode 100644 index 000000000000..a3944927eaeb --- /dev/null +++ b/arch/arm64/kernel/vdso32/vdso.lds.S @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Adapted from arm64 version. + * + * GNU linker script for the VDSO library. + * Heavily based on the vDSO linker scripts for other archs. + * + * Copyright (C) 2012-2018 ARM Limited + */ + +#include <linux/const.h> +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_FORMAT("elf32-littlearm", "elf32-bigarm", "elf32-littlearm") +OUTPUT_ARCH(arm) + +SECTIONS +{ + PROVIDE_HIDDEN(_vdso_data = . - PAGE_SIZE); + . = VDSO_LBASE + SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { *(.rodata*) } :text + + .text : { *(.text*) } :text =0xe7f001f2 + + .got : { *(.got) } + .rel.plt : { *(.rel.plt) } + + /DISCARD/ : { + *(.note.GNU-stack) + *(.data .data.* .gnu.linkonce.d.* .sdata*) + *(.bss .sbss .dynbss .dynsbss) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ +} + +VERSION +{ + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_clock_getres; + __kernel_sigreturn_arm; + __kernel_sigreturn_thumb; + __kernel_rt_sigreturn_arm; + __kernel_rt_sigreturn_thumb; + __vdso_clock_gettime64; + local: *; + }; +} + +/* + * Make the sigreturn code visible to the kernel. + */ +VDSO_compat_sigreturn_arm = __kernel_sigreturn_arm; +VDSO_compat_sigreturn_thumb = __kernel_sigreturn_thumb; +VDSO_compat_rt_sigreturn_arm = __kernel_rt_sigreturn_arm; +VDSO_compat_rt_sigreturn_thumb = __kernel_rt_sigreturn_thumb; diff --git a/arch/arm64/kernel/vdso32/vgettimeofday.c b/arch/arm64/kernel/vdso32/vgettimeofday.c new file mode 100644 index 000000000000..54fc1c2ce93f --- /dev/null +++ b/arch/arm64/kernel/vdso32/vgettimeofday.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 compat userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include <linux/time.h> +#include <linux/types.h> + +int __vdso_clock_gettime(clockid_t clock, + struct old_timespec32 *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime32(clock, ts); +} + +int __vdso_clock_gettime64(clockid_t clock, + struct __kernel_timespec *ts) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)ts >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_gettime(clock, ts); +} + +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __vdso_clock_getres(clockid_t clock_id, + struct old_timespec32 *res) +{ + /* The checks below are required for ABI consistency with arm */ + if ((u32)res >= TASK_SIZE_32) + return -EFAULT; + + return __cvdso_clock_getres_time32(clock_id, res); +} + +/* Avoid unresolved references emitted by GCC */ + +void __aeabi_unwind_cpp_pr0(void) +{ +} + +void __aeabi_unwind_cpp_pr1(void) +{ +} + +void __aeabi_unwind_cpp_pr2(void) +{ +} diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 6e3c9c8b2df9..525010504f9d 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -112,9 +112,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { u64 *guest_zcr = &vcpu->arch.ctxt.sys_regs[ZCR_EL1]; - /* Clean guest FP state to memory and invalidate cpu view */ - fpsimd_save(); - fpsimd_flush_cpu_state(); + fpsimd_save_and_flush_cpu_state(); if (guest_has_sve) *guest_zcr = read_sysreg_s(SYS_ZCR_EL12); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index c2afa7982047..dfd626447482 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -208,7 +208,7 @@ out: #define vq_word(vq) (((vq) - SVE_VQ_MIN) / 64) #define vq_mask(vq) ((u64)1 << ((vq) - SVE_VQ_MIN) % 64) -#define vq_present(vqs, vq) ((vqs)[vq_word(vq)] & vq_mask(vq)) +#define vq_present(vqs, vq) (!!((vqs)[vq_word(vq)] & vq_mask(vq))) static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index bd34016354ba..e5cc8d66bf53 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -6,6 +6,7 @@ #include <linux/linkage.h> +#include <asm/alternative.h> #include <asm/asm-offsets.h> #include <asm/assembler.h> #include <asm/fpsimdmacros.h> @@ -52,6 +53,20 @@ ENTRY(__guest_enter) // Store the host regs save_callee_saved_regs x1 + // Now the host state is stored if we have a pending RAS SError it must + // affect the host. If any asynchronous exception is pending we defer + // the guest entry. The DSB isn't necessary before v8.2 as any SError + // would be fatal. +alternative_if ARM64_HAS_RAS_EXTN + dsb nshst + isb +alternative_else_nop_endif + mrs x1, isr_el1 + cbz x1, 1f + mov x0, #ARM_EXCEPTION_IRQ + ret + +1: add x18, x0, #VCPU_CONTEXT // Macro ptrauth_switch_to_guest format: @@ -127,8 +142,8 @@ ENTRY(__guest_exit) alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error - // without an unmask-SError and isb. - esb + // without an unmask-SError and isb. The ESB-instruction consumed any + // pending guest error when we took the exception from the guest. mrs_s x2, SYS_DISR_EL1 str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] cbz x2, 1f @@ -136,8 +151,16 @@ alternative_if ARM64_HAS_RAS_EXTN orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT) 1: ret alternative_else - // If we have a pending asynchronous abort, now is the - // time to find out. From your VAXorcist book, page 666: + dsb sy // Synchronize against in-flight ld/st + isb // Prevent an early read of side-effect free ISR + mrs x2, isr_el1 + tbnz x2, #8, 2f // ISR_EL1.A + ret + nop +2: +alternative_endif + // We know we have a pending asynchronous abort, now is the + // time to flush it out. From your VAXorcist book, page 666: // "Threaten me not, oh Evil one! For I speak with // the power of DEC, and I command thee to show thyself!" mrs x2, elr_el2 @@ -145,10 +168,7 @@ alternative_else mrs x4, spsr_el2 mov x5, x0 - dsb sy // Synchronize against in-flight ld/st - nop msr daifclr, #4 // Unmask aborts -alternative_endif // This is our single instruction exception window. A pending // SError is guaranteed to occur at the earliest when we unmask @@ -161,6 +181,8 @@ abort_guest_exit_start: .global abort_guest_exit_end abort_guest_exit_end: + msr daifset, #4 // Mask aborts + // If the exception took place, restore the EL1 exception // context so that we can report some information. // Merge the exception code with the SError pending bit. diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index b8e045615961..ffa68d5713f1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -216,17 +216,34 @@ ENDPROC(\label) .align 11 +.macro check_preamble_length start, end +/* kvm_patch_vector_branch() generates code that jumps over the preamble. */ +.if ((\end-\start) != KVM_VECTOR_PREAMBLE) + .error "KVM vector preamble length mismatch" +.endif +.endm + .macro valid_vect target .align 7 +661: + esb stp x0, x1, [sp, #-16]! +662: b \target + +check_preamble_length 661b, 662b .endm .macro invalid_vect target .align 7 +661: b \target + nop +662: ldp x0, x1, [sp], #16 b \target + +check_preamble_length 661b, 662b .endm ENTRY(__kvm_hyp_vector) @@ -254,13 +271,14 @@ ENDPROC(__kvm_hyp_vector) #ifdef CONFIG_KVM_INDIRECT_VECTORS .macro hyp_ventry .align 7 -1: .rept 27 +1: esb + .rept 26 nop .endr /* * The default sequence is to directly branch to the KVM vectors, * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. + * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. * * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced * with: @@ -271,12 +289,13 @@ ENDPROC(__kvm_hyp_vector) * movk x0, #((addr >> 32) & 0xffff), lsl #32 * br x0 * - * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4. + * Where: + * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. * See kvm_patch_vector_branch for details. */ alternative_cb kvm_patch_vector_branch - b __kvm_hyp_vector + (1b - 0b) - nop + stp x0, x1, [sp, #-16]! + b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) nop nop nop @@ -301,6 +320,7 @@ ENTRY(__bp_harden_hyp_vecs_end) .popsection ENTRY(__smccc_workaround_1_smc_start) + esb sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b0041812bca9..adaf266d8de8 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -284,7 +284,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) return true; - far = read_sysreg_el2(far); + far = read_sysreg_el2(SYS_FAR); /* * The HPFAR can be invalid if the stage 2 fault did not @@ -401,7 +401,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) - vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr); + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); /* * We're using the raw exception code in order to only process @@ -604,7 +604,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) * Naturally, we want to avoid this. */ if (system_uses_irq_prio_masking()) { - gic_write_pmr(GIC_PRIO_IRQON); + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); dsb(sy); } @@ -697,8 +697,8 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par, asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); __hyp_do_panic(str_va, - spsr, elr, - read_sysreg(esr_el2), read_sysreg_el2(far), + spsr, elr, + read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR), read_sysreg(hpfar_el2), par, vcpu); } @@ -713,15 +713,15 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par, panic(__hyp_panic_string, spsr, elr, - read_sysreg_el2(esr), read_sysreg_el2(far), + read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR), read_sysreg(hpfar_el2), par, vcpu); } NOKPROBE_SYMBOL(__hyp_call_panic_vhe); void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt) { - u64 spsr = read_sysreg_el2(spsr); - u64 elr = read_sysreg_el2(elr); + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg(par_el1); if (!has_vhe()) diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index c283f7cbc702..7ddbc849b580 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -43,33 +43,33 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); - ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR); ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); - ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr); - ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0); - ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1); - ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr); - ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr); - ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0); - ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1); - ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far); - ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair); - ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar); - ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr); - ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair); - ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl); + ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1); + ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR); + ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1); + ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR); + ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR); + ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL); ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); - ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr); - ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr); + ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR); } static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { - ctxt->gp_regs.regs.pc = read_sysreg_el2(elr); - ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr); + ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR); + ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2); @@ -109,35 +109,35 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) { - write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); - write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); } static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr); - write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); - write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr); - write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0); - write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr); - write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr); - write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0); - write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1); - write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far); - write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair); - write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar); - write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr); - write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair); - write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl); + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); + write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); + write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); + write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); + write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); + write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR); + write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR); + write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR); + write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR); + write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR); + write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL); write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); - write_sysreg_el1(ctxt->gp_regs.elr_el1, elr); - write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr); + write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); + write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); } static void __hyp_text @@ -160,8 +160,8 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) pstate = PSR_MODE_EL2h | PSR_IL_BIT; - write_sysreg_el2(ctxt->gp_regs.regs.pc, elr); - write_sysreg_el2(pstate, spsr); + write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2); diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 32078b767f63..d49a14497715 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -33,12 +33,12 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, * in the TCR_EL1 register. We also need to prevent it to * allocate IPA->PA walks, so we enable the S1 MMU... */ - val = cxt->tcr = read_sysreg_el1(tcr); + val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; - write_sysreg_el1(val, tcr); - val = cxt->sctlr = read_sysreg_el1(sctlr); + write_sysreg_el1(val, SYS_TCR); + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); val |= SCTLR_ELx_M; - write_sysreg_el1(val, sctlr); + write_sysreg_el1(val, SYS_SCTLR); } /* @@ -85,8 +85,8 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) { /* Restore the registers to what they were */ - write_sysreg_el1(cxt->tcr, tcr); - write_sysreg_el1(cxt->sctlr, sctlr); + write_sysreg_el1(cxt->tcr, SYS_TCR); + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); } local_irq_restore(cxt->flags); diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index ba2aaeb84c6c..29ee1feba4eb 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -16,7 +16,7 @@ static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) - return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT); + return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT); return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c index d66613e6ad08..0d60e4f0af66 100644 --- a/arch/arm64/kvm/regmap.c +++ b/arch/arm64/kvm/regmap.c @@ -152,7 +152,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) switch (spsr_idx) { case KVM_SPSR_SVC: - return read_sysreg_el1(spsr); + return read_sysreg_el1(SYS_SPSR); case KVM_SPSR_ABT: return read_sysreg(spsr_abt); case KVM_SPSR_UND: @@ -177,7 +177,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) switch (spsr_idx) { case KVM_SPSR_SVC: - write_sysreg_el1(v, spsr); + write_sysreg_el1(v, SYS_SPSR); case KVM_SPSR_ABT: write_sysreg(v, spsr_abt); case KVM_SPSR_UND: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ce933f296049..f26e181d881c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -81,24 +81,24 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) */ switch (reg) { case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(sctlr_EL12); + case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(cpacr_EL12); - case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12); - case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12); - case TCR_EL1: return read_sysreg_s(tcr_EL12); - case ESR_EL1: return read_sysreg_s(esr_EL12); - case AFSR0_EL1: return read_sysreg_s(afsr0_EL12); - case AFSR1_EL1: return read_sysreg_s(afsr1_EL12); - case FAR_EL1: return read_sysreg_s(far_EL12); - case MAIR_EL1: return read_sysreg_s(mair_EL12); - case VBAR_EL1: return read_sysreg_s(vbar_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12); + case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); + case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); + case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); + case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); + case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); + case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); + case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); + case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); + case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); + case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); + case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(amair_EL12); - case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12); + case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); + case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); @@ -124,24 +124,24 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) */ switch (reg) { case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return; - case TCR_EL1: write_sysreg_s(val, tcr_EL12); return; - case ESR_EL1: write_sysreg_s(val, esr_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return; - case FAR_EL1: write_sysreg_s(val, far_EL12); return; - case MAIR_EL1: write_sysreg_s(val, mair_EL12); return; - case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; @@ -865,12 +865,12 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (r->Op2 & 0x1) { /* accessing PMCNTENSET_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val; - kvm_pmu_enable_counter(vcpu, val); + kvm_pmu_enable_counter_mask(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { /* accessing PMCNTENCLR_EL0 */ __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val; - kvm_pmu_disable_counter(vcpu, val); + kvm_pmu_disable_counter_mask(vcpu, val); } } else { p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask; diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index 2947ab1b0fa5..acd8084f1f2c 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -170,11 +170,10 @@ void kvm_patch_vector_branch(struct alt_instr *alt, addr |= ((u64)origptr & GENMASK_ULL(10, 7)); /* - * Branch to the second instruction in the vectors in order to - * avoid the initial store on the stack (which we already - * perform in the hardening vectors). + * Branch over the preamble in order to avoid the initial store on + * the stack (which we already perform in the hardening vectors). */ - addr += AARCH64_INSN_SIZE; + addr += KVM_VECTOR_PREAMBLE; /* stp x0, x1, [sp, #-16]! */ insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 5992eb9a9a08..1d3f0b5a9940 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -1,24 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * SWIOTLB-based DMA API implementation - * * Copyright (C) 2012 ARM Ltd. * Author: Catalin Marinas <catalin.marinas@arm.com> */ #include <linux/gfp.h> -#include <linux/acpi.h> -#include <linux/memblock.h> #include <linux/cache.h> -#include <linux/export.h> -#include <linux/slab.h> -#include <linux/genalloc.h> -#include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> -#include <linux/dma-contiguous.h> -#include <linux/vmalloc.h> -#include <linux/swiotlb.h> -#include <linux/pci.h> +#include <linux/dma-iommu.h> #include <asm/cacheflush.h> @@ -47,422 +36,33 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } -#ifdef CONFIG_IOMMU_DMA -static int __swiotlb_get_sgtable_page(struct sg_table *sgt, - struct page *page, size_t size) -{ - int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - - return ret; -} - -static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, - unsigned long pfn, size_t size) -{ - int ret = -ENXIO; - unsigned long nr_vma_pages = vma_pages(vma); - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - - if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - pfn + off, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); - } - - return ret; -} -#endif /* CONFIG_IOMMU_DMA */ - static int __init arm64_dma_init(void) { - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), - TAINT_CPU_OUT_OF_SPEC, - "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", - ARCH_DMA_MINALIGN, cache_line_size()); return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); } arch_initcall(arm64_dma_init); #ifdef CONFIG_IOMMU_DMA -#include <linux/dma-iommu.h> -#include <linux/platform_device.h> -#include <linux/amba/bus.h> - -/* Thankfully, all cache ops are by VA so we can ignore phys here */ -static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) -{ - __dma_flush_area(virt, PAGE_SIZE); -} - -static void *__iommu_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); - size_t iosize = size; - void *addr; - - if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) - return NULL; - - size = PAGE_ALIGN(size); - - /* - * Some drivers rely on this, and we probably don't want the - * possibility of stale kernel data being read by devices anyway. - */ - gfp |= __GFP_ZERO; - - if (!gfpflags_allow_blocking(gfp)) { - struct page *page; - /* - * In atomic context we can't remap anything, so we'll only - * get the virtually contiguous buffer we need by way of a - * physically contiguous allocation. - */ - if (coherent) { - page = alloc_pages(gfp, get_order(size)); - addr = page ? page_address(page) : NULL; - } else { - addr = dma_alloc_from_pool(size, &page, gfp); - } - if (!addr) - return NULL; - - *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (*handle == DMA_MAPPING_ERROR) { - if (coherent) - __free_pages(page, get_order(size)); - else - dma_free_from_pool(addr, size); - addr = NULL; - } - } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); - struct page *page; - - page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size), gfp & __GFP_NOWARN); - if (!page) - return NULL; - - *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); - if (*handle == DMA_MAPPING_ERROR) { - dma_release_from_contiguous(dev, page, - size >> PAGE_SHIFT); - return NULL; - } - addr = dma_common_contiguous_remap(page, size, VM_USERMAP, - prot, - __builtin_return_address(0)); - if (addr) { - if (!coherent) - __dma_flush_area(page_to_virt(page), iosize); - memset(addr, 0, size); - } else { - iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); - dma_release_from_contiguous(dev, page, - size >> PAGE_SHIFT); - } - } else { - pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); - struct page **pages; - - pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, - handle, flush_page); - if (!pages) - return NULL; - - addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, - __builtin_return_address(0)); - if (!addr) - iommu_dma_free(dev, pages, iosize, handle); - } - return addr; -} - -static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) -{ - size_t iosize = size; - - size = PAGE_ALIGN(size); - /* - * @cpu_addr will be one of 4 things depending on how it was allocated: - * - A remapped array of pages for contiguous allocations. - * - A remapped array of pages from iommu_dma_alloc(), for all - * non-atomic allocations. - * - A non-cacheable alias from the atomic pool, for atomic - * allocations by non-coherent devices. - * - A normal lowmem address, for atomic allocations by - * coherent devices. - * Hence how dodgy the below logic looks... - */ - if (dma_in_atomic_pool(cpu_addr, size)) { - iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - dma_free_from_pool(cpu_addr, size); - } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - struct page *page = vmalloc_to_page(cpu_addr); - - iommu_dma_unmap_page(dev, handle, iosize, 0, attrs); - dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); - dma_common_free_remap(cpu_addr, size, VM_USERMAP); - } else if (is_vmalloc_addr(cpu_addr)){ - struct vm_struct *area = find_vm_area(cpu_addr); - - if (WARN_ON(!area || !area->pages)) - return; - iommu_dma_free(dev, area->pages, iosize, &handle); - dma_common_free_remap(cpu_addr, size, VM_USERMAP); - } else { - iommu_dma_unmap_page(dev, handle, iosize, 0, 0); - __free_pages(virt_to_page(cpu_addr), get_order(size)); - } -} - -static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - struct vm_struct *area; - int ret; - - vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (!is_vmalloc_addr(cpu_addr)) { - unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); - return __swiotlb_mmap_pfn(vma, pfn, size); - } - - if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - /* - * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, - * hence in the vmalloc space. - */ - unsigned long pfn = vmalloc_to_pfn(cpu_addr); - return __swiotlb_mmap_pfn(vma, pfn, size); - } - - area = find_vm_area(cpu_addr); - if (WARN_ON(!area || !area->pages)) - return -ENXIO; - - return iommu_dma_mmap(area->pages, size, vma); -} - -static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, - size_t size, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct vm_struct *area = find_vm_area(cpu_addr); - - if (!is_vmalloc_addr(cpu_addr)) { - struct page *page = virt_to_page(cpu_addr); - return __swiotlb_get_sgtable_page(sgt, page, size); - } - - if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - /* - * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, - * hence in the vmalloc space. - */ - struct page *page = vmalloc_to_page(cpu_addr); - return __swiotlb_get_sgtable_page(sgt, page, size); - } - - if (WARN_ON(!area || !area->pages)) - return -ENXIO; - - return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, - GFP_KERNEL); -} - -static void __iommu_sync_single_for_cpu(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - phys_addr_t phys; - - if (dev_is_dma_coherent(dev)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); - arch_sync_dma_for_cpu(dev, phys, size, dir); -} - -static void __iommu_sync_single_for_device(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - phys_addr_t phys; - - if (dev_is_dma_coherent(dev)) - return; - - phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); - arch_sync_dma_for_device(dev, phys, size, dir); -} - -static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - int prot = dma_info_to_prot(dir, coherent, attrs); - dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - - if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - dev_addr != DMA_MAPPING_ERROR) - __dma_map_area(page_address(page) + offset, size, dir); - - return dev_addr; -} - -static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); - - iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); -} - -static void __iommu_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (dev_is_dma_coherent(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void __iommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (dev_is_dma_coherent(dev)) - return; - - for_each_sg(sgl, sg, nelems, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - bool coherent = dev_is_dma_coherent(dev); - - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_sg_for_device(dev, sgl, nelems, dir); - - return iommu_dma_map_sg(dev, sgl, nelems, - dma_info_to_prot(dir, coherent, attrs)); -} - -static void __iommu_unmap_sg_attrs(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs) -{ - if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); - - iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); -} - -static const struct dma_map_ops iommu_dma_ops = { - .alloc = __iommu_alloc_attrs, - .free = __iommu_free_attrs, - .mmap = __iommu_mmap_attrs, - .get_sgtable = __iommu_get_sgtable, - .map_page = __iommu_map_page, - .unmap_page = __iommu_unmap_page, - .map_sg = __iommu_map_sg_attrs, - .unmap_sg = __iommu_unmap_sg_attrs, - .sync_single_for_cpu = __iommu_sync_single_for_cpu, - .sync_single_for_device = __iommu_sync_single_for_device, - .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, - .sync_sg_for_device = __iommu_sync_sg_for_device, - .map_resource = iommu_dma_map_resource, - .unmap_resource = iommu_dma_unmap_resource, -}; - -static int __init __iommu_dma_init(void) -{ - return iommu_dma_init(); -} -arch_initcall(__iommu_dma_init); - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *ops) -{ - struct iommu_domain *domain; - - if (!ops) - return; - - /* - * The IOMMU core code allocates the default DMA domain, which the - * underlying IOMMU driver needs to support via the dma-iommu layer. - */ - domain = iommu_get_domain_for_dev(dev); - - if (!domain) - goto out_err; - - if (domain->type == IOMMU_DOMAIN_DMA) { - if (iommu_dma_init_domain(domain, dma_base, size, dev)) - goto out_err; - - dev->dma_ops = &iommu_dma_ops; - } - - return; - -out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); -} - void arch_teardown_dma_ops(struct device *dev) { dev->dma_ops = NULL; } - -#else - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu) -{ } - -#endif /* CONFIG_IOMMU_DMA */ +#endif void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { + int cls = cache_line_size_of_cpu(); + + WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN, + TAINT_CPU_OUT_OF_SPEC, + "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + dev_driver_string(dev), dev_name(dev), + ARCH_DMA_MINALIGN, cls); + dev->dma_coherent = coherent; - __iommu_setup_dma_ops(dev, dma_base, size, iommu); + if (iommu) + iommu_setup_dma_ops(dev, dma_base, size); #ifdef CONFIG_XEN if (xen_initial_domain()) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 2d115016feb4..c8c61b1eb479 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -384,40 +384,31 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re #define VM_FAULT_BADACCESS 0x020000 static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr, - unsigned int mm_flags, unsigned long vm_flags, - struct task_struct *tsk) + unsigned int mm_flags, unsigned long vm_flags) { - struct vm_area_struct *vma; - vm_fault_t fault; + struct vm_area_struct *vma = find_vma(mm, addr); - vma = find_vma(mm, addr); - fault = VM_FAULT_BADMAP; if (unlikely(!vma)) - goto out; - if (unlikely(vma->vm_start > addr)) - goto check_stack; + return VM_FAULT_BADMAP; /* * Ok, we have a good vm_area for this memory access, so we can handle * it. */ -good_area: + if (unlikely(vma->vm_start > addr)) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + return VM_FAULT_BADMAP; + if (expand_stack(vma, addr)) + return VM_FAULT_BADMAP; + } + /* * Check that the permissions on the VMA allow for the fault which * occurred. */ - if (!(vma->vm_flags & vm_flags)) { - fault = VM_FAULT_BADACCESS; - goto out; - } - + if (!(vma->vm_flags & vm_flags)) + return VM_FAULT_BADACCESS; return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags); - -check_stack: - if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) - goto good_area; -out: - return fault; } static bool is_el0_instruction_abort(unsigned int esr) @@ -425,12 +416,20 @@ static bool is_el0_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW; } +/* + * Note: not valid for EL1 DC IVAC, but we never use that such that it + * should fault. EL0 cannot issue DC IVAC (undef). + */ +static bool is_write_abort(unsigned int esr) +{ + return (esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM); +} + static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf; - struct task_struct *tsk; - struct mm_struct *mm; + struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -438,9 +437,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (notify_page_fault(regs, esr)) return 0; - tsk = current; - mm = tsk->mm; - /* * If we're in an interrupt or have no user context, we must not take * the fault. @@ -453,7 +449,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (is_el0_instruction_abort(esr)) { vm_flags = VM_EXEC; - } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { + mm_flags |= FAULT_FLAG_INSTRUCTION; + } else if (is_write_abort(esr)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } @@ -492,12 +489,14 @@ retry: */ might_sleep(); #ifdef CONFIG_DEBUG_VM - if (!user_mode(regs) && !search_exception_tables(regs->pc)) + if (!user_mode(regs) && !search_exception_tables(regs->pc)) { + up_read(&mm->mmap_sem); goto no_context; + } #endif } - fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + fault = __do_page_fault(mm, addr, mm_flags, vm_flags); major |= fault & VM_FAULT_MAJOR; if (fault & VM_FAULT_RETRY) { @@ -537,11 +536,11 @@ retry: * that point. */ if (major) { - tsk->maj_flt++; + current->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); } else { - tsk->min_flt++; + current->min_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index f475e54fbc43..bbeb6a5a6ba6 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -228,7 +228,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, if (sz == PUD_SIZE) { ptep = (pte_t *)pudp; - } else if (sz == (PAGE_SIZE * CONT_PTES)) { + } else if (sz == (CONT_PTE_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); @@ -246,7 +246,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ptep = huge_pmd_share(mm, addr, pudp); else ptep = (pte_t *)pmd_alloc(mm, pudp, addr); - } else if (sz == (PMD_SIZE * CONT_PMDS)) { + } else if (sz == (CONT_PMD_SIZE)) { pmdp = pmd_alloc(mm, pudp, addr); WARN_ON(addr & (sz - 1)); return (pte_t *)pmdp; @@ -454,9 +454,9 @@ static int __init hugetlbpage_init(void) #ifdef CONFIG_ARM64_4K_PAGES add_huge_page_size(PUD_SIZE); #endif - add_huge_page_size(PMD_SIZE * CONT_PMDS); + add_huge_page_size(CONT_PMD_SIZE); add_huge_page_size(PMD_SIZE); - add_huge_page_size(PAGE_SIZE * CONT_PTES); + add_huge_page_size(CONT_PTE_SIZE); return 0; } @@ -470,9 +470,9 @@ static __init int setup_hugepagesz(char *opt) #ifdef CONFIG_ARM64_4K_PAGES case PUD_SIZE: #endif - case PMD_SIZE * CONT_PMDS: + case CONT_PMD_SIZE: case PMD_SIZE: - case PAGE_SIZE * CONT_PTES: + case CONT_PTE_SIZE: add_huge_page_size(ps); return 1; } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 749c9b269f08..f3c795278def 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -180,8 +180,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); +#endif max_zone_pfns[ZONE_NORMAL] = max; free_area_init_nodes(max_zone_pfns); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e5ae8663f230..1b49c08dfa2b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -362,7 +362,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, static phys_addr_t __pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(PGALLOC_GFP); + void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL); BUG_ON(!ptr); /* Ensure the zeroed page is visible to the page table walker */ @@ -765,7 +765,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* !ARM64_SWAPPER_USES_SECTION_MAPS */ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) { @@ -960,32 +960,28 @@ int __init arch_ioremap_pmd_supported(void) int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), pud_val(new_pud))) return 0; - BUG_ON(phys & ~PUD_MASK); + VM_BUG_ON(phys & ~PUD_MASK); set_pud(pudp, new_pud); return 1; } int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { - pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | - pgprot_val(mk_sect_prot(prot))); - pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), mk_pmd_sect_prot(prot)); /* Only allow permission changes for now */ if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), pmd_val(new_pmd))) return 0; - BUG_ON(phys & ~PMD_MASK); + VM_BUG_ON(phys & ~PMD_MASK); set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 47b057bfa803..03c53f16ee77 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -19,8 +19,7 @@ struct page_change_data { bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED); -static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; pte_t pte = READ_ONCE(*ptep); @@ -151,17 +150,48 @@ int set_memory_valid(unsigned long addr, int numpages, int enable) __pgprot(PTE_VALID)); } -#ifdef CONFIG_DEBUG_PAGEALLOC +int set_direct_map_invalid_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(PTE_VALID), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + +int set_direct_map_default_noflush(struct page *page) +{ + struct page_change_data data = { + .set_mask = __pgprot(PTE_VALID | PTE_WRITE), + .clear_mask = __pgprot(PTE_RDONLY), + }; + + if (!rodata_full) + return 0; + + return apply_to_page_range(&init_mm, + (unsigned long)page_address(page), + PAGE_SIZE, change_page_range, &data); +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { + if (!debug_pagealloc_enabled() && !rodata_full) + return; + set_memory_valid((unsigned long)page_address(page), numpages, enable); } -#ifdef CONFIG_HIBERNATION + /* - * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function - * is used to determine if a linear map page has been marked as not-valid by - * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit. - * This is based on kern_addr_valid(), which almost does what we need. + * This function is used to determine if a linear map page has been marked as + * not-valid. Walk the page table and check the PTE_VALID bit. This is based + * on kern_addr_valid(), which almost does what we need. * * Because this is only called on the kernel linear map, p?d_sect() implies * p?d_present(). When debug_pagealloc is enabled, sections mappings are @@ -175,6 +205,9 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); + if (!debug_pagealloc_enabled() && !rodata_full) + return true; + pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; @@ -196,5 +229,3 @@ bool kernel_page_present(struct page *page) ptep = pte_offset_kernel(pmdp, addr); return pte_valid(READ_ONCE(*ptep)); } -#endif /* CONFIG_HIBERNATION */ -#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 9a0c7d5090d6..7548f9ca1f11 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -19,10 +19,12 @@ static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { + gfp_t gfp = GFP_PGTABLE_USER; + if (PGD_SIZE == PAGE_SIZE) - return (pgd_t *)__get_free_page(PGALLOC_GFP); + return (pgd_t *)__get_free_page(gfp); else - return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cache, gfp); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 87c568807925..f5b437f8a22b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -970,7 +970,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index eeb0471268a0..b4fb61c83494 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config C6X def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP diff --git a/arch/c6x/include/asm/flat.h b/arch/c6x/include/asm/flat.h index 76fd0bb962a3..9e6544b51386 100644 --- a/arch/c6x/include/asm/flat.h +++ b/arch/c6x/include/asm/flat.h @@ -4,11 +4,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 #endif /* __ASM_C6X_FLAT_H */ diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c index e72d9b6bc234..e456652facce 100644 --- a/arch/c6x/kernel/signal.c +++ b/arch/c6x/kernel/signal.c @@ -90,7 +90,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs) return regs->a4; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index c4785c9b67a2..ec61034fdf56 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -250,7 +250,7 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs) die_if_kernel(except_info->kernel_str, regs, addr); force_sig_fault(except_info->signo, except_info->code, - (void __user *)addr, current); + (void __user *)addr); } /* diff --git a/arch/csky/Makefile b/arch/csky/Makefile index f9aab9157c4a..fb1bbbd91954 100644 --- a/arch/csky/Makefile +++ b/arch/csky/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only OBJCOPYFLAGS :=-O binary GZFLAGS :=-9 -KBUILD_DEFCONFIG := defconfig ifdef CONFIG_CPU_HAS_FPU FPUEXT = f diff --git a/arch/csky/abiv1/alignment.c b/arch/csky/abiv1/alignment.c index d789be36eb4f..27ef5b2c43ab 100644 --- a/arch/csky/abiv1/alignment.c +++ b/arch/csky/abiv1/alignment.c @@ -283,7 +283,7 @@ bad_area: do_exit(SIGKILL); } - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); } static struct ctl_table alignment_tbl[4] = { diff --git a/arch/csky/abiv2/fpu.c b/arch/csky/abiv2/fpu.c index e7e11344005a..86d187d4e5af 100644 --- a/arch/csky/abiv2/fpu.c +++ b/arch/csky/abiv2/fpu.c @@ -124,7 +124,7 @@ void fpu_fpe(struct pt_regs *regs) code = FPE_FLTRES; } - force_sig_fault(sig, code, (void __user *)regs->pc, current); + force_sig_fault(sig, code, (void __user *)regs->pc); } #define FMFVR_FPU_REGS(vrx, vry) \ diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index d213bb47b717..98c5716708d6 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -8,6 +8,9 @@ #include <linux/mm.h> #include <linux/sched.h> +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) return pte; } -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); - if (!pte) - return NULL; - - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { free_pages((unsigned long)pgd, PGD_ORDER); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 04a43cfd4e09..9b1b7c039ddf 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -39,6 +39,11 @@ static int save_fpu_state(struct sigcontext __user *sc) #endif struct rt_sigframe { + /* + * pad[3] is compatible with the same struct defined in + * gcc/libgcc/config/csky/linux-unwind.h + */ + int pad[3]; struct siginfo info; struct ucontext uc; }; @@ -61,7 +66,6 @@ SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; - struct task_struct *task; sigset_t set; /* Always make any pending restarted system calls return -EINTR */ @@ -86,8 +90,7 @@ SYSCALL_DEFINE0(rt_sigreturn) return regs->a0; badframe: - task = current; - force_sig(SIGSEGV, task); + force_sig(SIGSEGV); return 0; } diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index f487a9b996ae..2792e9601ac5 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -106,7 +106,7 @@ void buserr(struct pt_regs *regs) pr_err("User mode Bus Error\n"); show_regs(regs); - force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc, current); + force_sig_fault(SIGSEGV, 0, (void __user *)regs->pc); } #define USR_BKPT 0x1464 diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 18041f46ded1..f76618b630f9 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -179,7 +179,7 @@ bad_area: bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -212,5 +212,5 @@ do_sigbus: if (!user_mode(regs)) goto no_context; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index ecfc4b4b6373..ec800e9d5aad 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -2,6 +2,9 @@ config H8300 def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK + select BINFMT_FLAT_OLD_ALWAYS_RAM select GENERIC_ATOMIC64 select HAVE_UID16 select VIRT_TO_BUS diff --git a/arch/h8300/include/asm/flat.h b/arch/h8300/include/asm/flat.h index f4cdfcbdd2ba..78070f924177 100644 --- a/arch/h8300/include/asm/flat.h +++ b/arch/h8300/include/asm/flat.h @@ -8,11 +8,6 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) 1 -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_set_persistent(relval, p) 0 - /* * on the H8 a couple of the relocations have an instruction in the * top byte. As there can only be 24bits of address space, we just @@ -22,7 +17,7 @@ #define flat_get_relocate_addr(rel) (rel & ~0x00000001) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { u32 val = get_unaligned((__force u32 *)rp); if (!(flags & FLAT_FLAG_GOTPIC)) diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index f5ff3b794c85..15db45a03b04 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -250,7 +250,7 @@ asmlinkage void trace_trap(unsigned long bp) { if ((unsigned long)current->thread.breakinfo.addr == bp) { user_disable_single_step(current); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } else - force_sig(SIGILL, current); + force_sig(SIGILL); } diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c index c0af930052c0..ee21f37b7ed4 100644 --- a/arch/h8300/kernel/ptrace_s.c +++ b/arch/h8300/kernel/ptrace_s.c @@ -40,5 +40,5 @@ void user_enable_single_step(struct task_struct *child) asmlinkage void trace_trap(unsigned long bp) { (void)bp; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index e0f2b708e5d9..ef7489b7c459 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -126,7 +126,7 @@ asmlinkage int sys_rt_sigreturn(void) return er0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index 5bc36db26475..d48864c48e5a 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -252,6 +252,6 @@ asmlinkage int sys_rt_sigreturn(void) return regs->r00; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index a01da26dbfe1..69c623b14ddd 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -239,7 +239,7 @@ int die_if_kernel(char *str, struct pt_regs *regs, long err) static void misaligned_instruction(struct pt_regs *regs) { die_if_kernel("Misaligned Instruction", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } /* @@ -250,19 +250,19 @@ static void misaligned_instruction(struct pt_regs *regs) static void misaligned_data_load(struct pt_regs *regs) { die_if_kernel("Misaligned Data Load", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static void misaligned_data_store(struct pt_regs *regs) { die_if_kernel("Misaligned Data Store", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static void illegal_instruction(struct pt_regs *regs) { die_if_kernel("Illegal Instruction", regs, 0); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* @@ -272,7 +272,7 @@ static void illegal_instruction(struct pt_regs *regs) static void precise_bus_error(struct pt_regs *regs) { die_if_kernel("Precise Bus Error", regs, 0); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } /* @@ -407,7 +407,7 @@ void do_trap0(struct pt_regs *regs) * may want to use a different trap0 flavor. */ force_sig_fault(SIGTRAP, TRAP_BRKPT, - (void __user *) pt_elr(regs), current); + (void __user *) pt_elr(regs)); } else { #ifdef CONFIG_KGDB kgdb_handle_exception(pt_cause(regs), SIGTRAP, diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index b7a99aa5b0ba..b3bc71680ae4 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -135,14 +135,14 @@ good_area: si_signo = SIGSEGV; si_code = SEGV_ACCERR; } - force_sig_fault(si_signo, si_code, (void __user *)address, current); + force_sig_fault(si_signo, si_code, (void __user *)address); return; bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } /* Kernel-mode fault falls through */ diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 7aeb48a18576..1a338e541334 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -324,8 +324,6 @@ static int rs_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } -#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) - /* * This routine will shutdown a serial port; interrupts are disabled, and * DTR is dropped if the hangup on close termio flag is on. diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 206530d0751b..50440f3ddc43 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -124,10 +124,10 @@ ATOMIC_FETCH_OP(xor, ^) #undef ATOMIC_OP #define ATOMIC64_OP(op, c_op) \ -static __inline__ long \ -ia64_atomic64_##op (__s64 i, atomic64_t *v) \ +static __inline__ s64 \ +ia64_atomic64_##op (s64 i, atomic64_t *v) \ { \ - __s64 old, new; \ + s64 old, new; \ CMPXCHG_BUGCHECK_DECL \ \ do { \ @@ -139,10 +139,10 @@ ia64_atomic64_##op (__s64 i, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP(op, c_op) \ -static __inline__ long \ -ia64_atomic64_fetch_##op (__s64 i, atomic64_t *v) \ +static __inline__ s64 \ +ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \ { \ - __s64 old, new; \ + s64 old, new; \ CMPXCHG_BUGCHECK_DECL \ \ do { \ @@ -162,7 +162,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_add_return(i,v) \ ({ \ - long __ia64_aar_i = (i); \ + s64 __ia64_aar_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ : ia64_atomic64_add(__ia64_aar_i, v); \ @@ -170,7 +170,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_sub_return(i,v) \ ({ \ - long __ia64_asr_i = (i); \ + s64 __ia64_asr_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ : ia64_atomic64_sub(__ia64_asr_i, v); \ @@ -178,7 +178,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_add(i,v) \ ({ \ - long __ia64_aar_i = (i); \ + s64 __ia64_aar_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ @@ -186,7 +186,7 @@ ATOMIC64_OPS(sub, -) #define atomic64_fetch_sub(i,v) \ ({ \ - long __ia64_asr_i = (i); \ + s64 __ia64_asr_i = (i); \ __ia64_atomic_const(i) \ ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index c0239bf77a09..782c481d7052 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -197,21 +197,21 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) */ printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } else if (ia64_psr(regs)->tb) { /* * Branch Tracing is enabled. * Force a taken branch signal. */ force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } else if (ia64_psr(regs)->ss) { /* * Single Step is enabled. * Force a trace signal. */ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL, - 0, 0, 0, current); + 0, 0, 0); } return rv; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6a52d761854b..79190d877fa7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1831,7 +1831,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset, ti->cpu = cpu; p->stack = ti; p->state = TASK_UNINTERRUPTIBLE; - cpumask_set_cpu(cpu, &p->cpus_allowed); + cpumask_set_cpu(cpu, &p->cpus_mask); INIT_LIST_HEAD(&p->tasks); p->parent = p->real_parent = p->group_leader = p; INIT_LIST_HEAD(&p->children); diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 58a6337c0690..7c52bd2695a2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6390,11 +6390,7 @@ pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) } /* save the current system wide pmu states */ - ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - goto cleanup_reserve; - } + on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); /* officially change to the alternate interrupt handler */ pfm_alt_intr_handler = hdl; @@ -6421,7 +6417,6 @@ int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) { int i; - int ret; if (hdl == NULL) return -EINVAL; @@ -6435,10 +6430,7 @@ pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) pfm_alt_intr_handler = NULL; - ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); - if (ret) { - DPRINT(("on_each_cpu() failed: %d\n", ret)); - } + on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); for_each_online_cpu(i) { pfm_unreserve_session(NULL, 1, i); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 6062fd14e34e..e5044aed9452 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -152,7 +152,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) return retval; give_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return retval; } @@ -257,7 +257,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) */ check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN; if (!likely(on_sig_stack(check_sp))) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } } @@ -265,7 +265,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); if (!access_ok(frame, sizeof(*frame))) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } @@ -282,7 +282,7 @@ setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) err |= setup_sigcontext(&frame->sc, set, scr); if (unlikely(err)) { - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return 1; } diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index e01df3f2f80d..ecc44926737b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -354,3 +354,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 85d8616ac4f6..e13cb905930f 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -176,7 +176,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) } force_sig_fault(sig, code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - break_num, 0 /* clear __ISR_VALID */, 0, current); + break_num, 0 /* clear __ISR_VALID */, 0); } /* @@ -353,7 +353,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } force_sig_fault(SIGFPE, si_code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } } else { if (exception == -1) { @@ -373,7 +373,7 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) } force_sig_fault(SIGFPE, si_code, (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } } return 0; @@ -408,7 +408,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *) (regs.cr_iip + ia64_psr(®s)->ri), - 0, 0, 0, current); + 0, 0, 0); return rv; } @@ -483,7 +483,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + ia64_psr(®s)->ri); } force_sig_fault(sig, code, addr, - vector, __ISR_VALID, isr, current); + vector, __ISR_VALID, isr); return; } else if (ia64_done_with_exception(®s)) return; @@ -493,7 +493,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 31: /* Unsupported Data Reference */ if (user_mode(®s)) { force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip, - vector, __ISR_VALID, isr, current); + vector, __ISR_VALID, isr); return; } sprintf(buf, "Unsupported data reference"); @@ -542,7 +542,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, == NOTIFY_STOP) return; force_sig_fault(SIGTRAP, si_code, (void __user *) ifa, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); return; case 32: /* fp fault */ @@ -550,7 +550,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); } return; @@ -578,7 +578,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, if (user_mode(®s)) { force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *) iip, - 0, 0, 0, current); + 0, 0, 0); return; } sprintf(buf, "Unimplemented Instruction Address fault"); @@ -589,14 +589,14 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", iip, ifa, isr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; case 46: printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", iip, ifa, isr, iim); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; case 47: @@ -608,5 +608,5 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, break; } if (!die_if_kernel(buf, ®s, error)) - force_sig(SIGILL, current); + force_sig(SIGILL); } diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index a167a3824b35..eb7d5df59fa3 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1537,6 +1537,6 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) } force_sigbus: force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa, - 0, 0, 0, current); + 0, 0, 0); goto done; } diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index edcdfc149311..16c6d377c502 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -121,8 +121,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_visibility, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_visibility, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; } else if (status != PAL_VISIBILITY_OK) goto failed; @@ -143,8 +143,8 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) if (status != PAL_STATUS_SUCCESS) goto failed; atomic_set(&uc_pool->status, 0); - status = smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); - if (status || atomic_read(&uc_pool->status)) + smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); + if (atomic_read(&uc_pool->status)) goto failed; /* diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 5baeb022f474..3c3a283d3172 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -249,7 +249,7 @@ retry: } if (user_mode(regs)) { force_sig_fault(signal, code, (void __user *) address, - 0, __ISR_VALID, isr, current); + 0, __ISR_VALID, isr); return; } diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 218e037ef901..c518d695c376 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -3,10 +3,15 @@ config M68K bool default y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE + select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_MIGHT_HAVE_PC_PARPORT if ISA select ARCH_NO_COHERENT_DMA_MMAP if !MMU select ARCH_NO_PREEMPT if !COLDFIRE + select BINFMT_FLAT_ARGVP_ENVP_ON_STACK + select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE select HAVE_IDE select HAVE_AOUT if MMU select HAVE_DEBUG_BUGVERBOSE diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index fea392cfcf1b..04e0f211afb3 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -71,9 +71,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -205,7 +202,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -231,7 +227,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -308,7 +303,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -436,6 +430,8 @@ CONFIG_FB_AMIGA_OCS=y CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -553,13 +549,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -583,7 +580,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -626,6 +622,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 2474d267460e..c6abbb535878 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -67,9 +67,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -201,7 +198,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -227,7 +223,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -304,7 +299,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -397,6 +391,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set @@ -513,13 +509,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -543,7 +540,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -586,6 +582,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 0fc7d2992fe0..06ae65bad177 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -74,9 +74,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -208,7 +205,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -234,7 +230,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -311,7 +306,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -421,6 +415,8 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FB_ATARI=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -535,13 +531,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -565,7 +562,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -608,6 +604,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 699df9fdf866..5616b94053b6 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -64,9 +64,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -301,7 +296,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index b50802255324..1106521f3b56 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -66,9 +66,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -200,7 +197,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -226,7 +222,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -303,7 +298,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -399,6 +393,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set @@ -515,13 +511,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -545,7 +542,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -588,6 +584,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 04e7d70f6030..226c6c063cd4 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -65,9 +65,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -305,7 +300,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -423,6 +417,8 @@ CONFIG_PTP_1588_CLOCK=m CONFIG_FB=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -537,13 +533,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -567,7 +564,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -610,6 +606,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 5e1cc4c17852..39f603417928 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -85,9 +85,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -219,7 +216,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -245,7 +241,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -325,7 +320,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -499,6 +493,8 @@ CONFIG_FB_FM2=y CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -619,13 +615,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -649,7 +646,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -692,6 +688,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 170ac8792c2d..175a607f576c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -63,9 +63,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -197,7 +194,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -223,7 +219,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -300,7 +295,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -393,6 +387,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -505,13 +501,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -535,7 +532,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -578,6 +574,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index d865592a423e..f41c34d3cdd0 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -64,9 +64,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -198,7 +195,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -224,7 +220,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -301,7 +296,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -506,13 +502,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -536,7 +533,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -579,6 +575,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 034a9de90484..c9d2cb0a1cf4 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -65,9 +65,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -199,7 +196,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -225,7 +221,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -302,7 +297,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -408,6 +402,8 @@ CONFIG_PPS_CLIENT_PARPORT=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m @@ -524,13 +520,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -554,7 +551,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -597,6 +593,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 49be0f9fcd8d..79a64fdd6bf0 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -61,9 +61,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -298,7 +293,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -394,6 +388,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -508,13 +504,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -538,7 +535,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -581,6 +577,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index a71acf4a6004..e3402a5d165b 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -61,9 +61,6 @@ CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_ESP_OFFLOAD=m CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m CONFIG_INET_RAW_DIAG=m @@ -195,7 +192,6 @@ CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m -CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y @@ -221,7 +217,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m CONFIG_NF_FLOW_TABLE_IPV6=m @@ -298,7 +293,6 @@ CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m CONFIG_NET_IFE=m -# CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_TEST_ASYNC_DRIVER_PROBE=m @@ -393,6 +387,8 @@ CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y +# CONFIG_LCD_CLASS_DEVICE is not set +# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID=m @@ -507,13 +503,14 @@ CONFIG_NLS_MAC_TURKISH=m CONFIG_DLM=m CONFIG_ENCRYPTED_KEYS=m CONFIG_HARDENED_USERCOPY=y -CONFIG_CRYPTO_RSA=m -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_RSA=m +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_ECRDSA=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_AEGIS128=m CONFIG_CRYPTO_AEGIS128L=m @@ -537,7 +534,6 @@ CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3=m -CONFIG_CRYPTO_STREEBOG=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -580,6 +576,7 @@ CONFIG_ATOMIC64_SELFTEST=m CONFIG_ASYNC_RAID6_TEST=m CONFIG_TEST_HEXDUMP=m CONFIG_TEST_STRING_HELPERS=m +CONFIG_TEST_STRSCPY=m CONFIG_TEST_KSTRTOX=m CONFIG_TEST_PRINTF=m CONFIG_TEST_BITMAP=m diff --git a/arch/m68k/include/asm/flat.h b/arch/m68k/include/asm/flat.h index 4f1d1e373420..46379e08cdd6 100644 --- a/arch/m68k/include/asm/flat.h +++ b/arch/m68k/include/asm/flat.h @@ -6,35 +6,7 @@ #ifndef __M68KNOMMU_FLAT_H__ #define __M68KNOMMU_FLAT_H__ -#include <linux/uaccess.h> - -#define flat_argvp_envp_on_stack() 1 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) -{ -#ifdef CONFIG_CPU_HAS_NO_UNALIGNED - return copy_from_user(addr, rp, 4) ? -EFAULT : 0; -#else - return get_user(*addr, rp); -#endif -} - -static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) -{ -#ifdef CONFIG_CPU_HAS_NO_UNALIGNED - return copy_to_user(rp, &addr, 4) ? -EFAULT : 0; -#else - return put_user(addr, rp); -#endif -} -#define flat_get_relocate_addr(rel) (rel) - -static inline int flat_set_persistent(u32 relval, u32 *persistent) -{ - return 0; -} +#include <asm-generic/flat.h> #define FLAT_PLAT_INIT(regs) \ do { \ diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 1456c5eecbd9..1a8ddbd0d23c 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -13,55 +13,18 @@ #include <asm/tlb.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + extern const char bad_pmd_string[]; #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long) pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t page) -{ - pgtable_page_dtor(page); - __free_page(page); -} - #define __pte_free_tlb(tlb,pte,addr) \ do { \ pgtable_page_dtor(pte); \ tlb_remove_page((tlb), pte); \ } while (0) -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - unsigned long page = __get_free_page(GFP_KERNEL); - - if (!page) - return NULL; - - memset((void *)page, 0, PAGE_SIZE); - return (pte_t *) (page); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 0); - - if (page == NULL) - return NULL; - - clear_highpage(page); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; - -} - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { pmd_val(*pmd) = __pa((unsigned long)pte); diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index b4aa853051bd..30cd59caf037 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -18,57 +18,22 @@ #include <asm/pgalloc.h> #if defined(CONFIG_MMU) && !defined(CONFIG_COLDFIRE) - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t flag, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - struct page *page, **map; - pgprot_t pgprot; - void *addr; - int i, order; - - pr_debug("dma_alloc_coherent: %d,%x\n", size, flag); - - size = PAGE_ALIGN(size); - order = get_order(size); - - page = alloc_pages(flag | __GFP_ZERO, order); - if (!page) - return NULL; - - *handle = page_to_phys(page); - map = kmalloc(sizeof(struct page *) << order, flag & ~__GFP_DMA); - if (!map) { - __free_pages(page, order); - return NULL; - } - split_page(page, order); - - order = 1 << order; - size >>= PAGE_SHIFT; - map[0] = page; - for (i = 1; i < size; i++) - map[i] = page + i; - for (; i < order; i++) - __free_page(page + i); - pgprot = __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY); - if (CPU_IS_040_OR_060) - pgprot_val(pgprot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S; - else - pgprot_val(pgprot) |= _PAGE_NOCACHE030; - addr = vmap(map, size, VM_MAP, pgprot); - kfree(map); - - return addr; + cache_push(page_to_phys(page), size); } -void arch_dma_free(struct device *dev, size_t size, void *addr, - dma_addr_t handle, unsigned long attrs) +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) { - pr_debug("dma_free_coherent: %p, %x\n", addr, handle); - vfree(addr); + if (CPU_IS_040_OR_060) { + pgprot_val(prot) &= ~_PAGE_CACHE040; + pgprot_val(prot) |= _PAGE_GLOBAL040 | _PAGE_NOCACHE_S; + } else { + pgprot_val(prot) |= _PAGE_NOCACHE030; + } + return prot; } - #else #include <asm/cacheflush.h> diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 87e7f3639839..05610e6924c1 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -803,7 +803,7 @@ asmlinkage int do_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return regs->d0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -825,7 +825,7 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs, struct switch_stack *sw) return regs->d0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7e3d0734b2f3..9a3eb2558568 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -433,3 +433,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index b2fd000b9285..344f93d36a9a 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -431,7 +431,7 @@ static inline void bus_error030 (struct frame *fp) pr_err("BAD KERNEL BUSERR\n"); die_if_kernel("Oops", &fp->ptregs,0); - force_sig(SIGKILL, current); + force_sig(SIGKILL); return; } } else { @@ -463,7 +463,7 @@ static inline void bus_error030 (struct frame *fp) !(ssw & RW) ? "write" : "read", addr, fp->ptregs.pc); die_if_kernel ("Oops", &fp->ptregs, buserr_type); - force_sig (SIGBUS, current); + force_sig (SIGBUS); return; } @@ -493,7 +493,7 @@ static inline void bus_error030 (struct frame *fp) do_page_fault (&fp->ptregs, addr, 0); } else { pr_debug("protection fault on insn access (segv).\n"); - force_sig (SIGSEGV, current); + force_sig (SIGSEGV); } } #else @@ -571,7 +571,7 @@ static inline void bus_error030 (struct frame *fp) !(ssw & RW) ? "write" : "read", addr, fp->ptregs.pc); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } else { #if 0 @@ -598,7 +598,7 @@ static inline void bus_error030 (struct frame *fp) #endif pr_debug("Unknown SIGSEGV - 1\n"); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -621,7 +621,7 @@ static inline void bus_error030 (struct frame *fp) buserr: pr_err("BAD KERNEL BUSERR\n"); die_if_kernel("Oops",&fp->ptregs,0); - force_sig(SIGKILL, current); + force_sig(SIGKILL); return; } @@ -660,7 +660,7 @@ static inline void bus_error030 (struct frame *fp) addr, fp->ptregs.pc); pr_debug("Unknown SIGSEGV - 2\n"); die_if_kernel("Oops",&fp->ptregs,mmusr); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -804,7 +804,7 @@ asmlinkage void buserr_c(struct frame *fp) default: die_if_kernel("bad frame format",&fp->ptregs,0); pr_debug("Unknown SIGSEGV - 4\n"); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } } @@ -1127,7 +1127,7 @@ asmlinkage void trap_c(struct frame *fp) addr = (void __user*) fp->un.fmtb.daddr; break; } - force_sig_fault(sig, si_code, addr, current); + force_sig_fault(sig, si_code, addr); } void die_if_kernel (char *str, struct pt_regs *fp, int nr) @@ -1159,6 +1159,6 @@ asmlinkage void fpsp040_die(void) #ifdef CONFIG_M68KFPU_EMU asmlinkage void fpemu_signal(int signal, int code, void *addr) { - force_sig_fault(signal, code, addr, current); + force_sig_fault(signal, code, addr); } #endif diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 11be08f4f750..205ac75da13d 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -911,6 +911,10 @@ static const struct resource mac_scsi_iifx_rsrc[] __initconst = { .flags = IORESOURCE_MEM, .start = 0x50008000, .end = 0x50009FFF, + }, { + .flags = IORESOURCE_MEM, + .start = 0x50008000, + .end = 0x50009FFF, }, }; @@ -1012,10 +1016,12 @@ int __init mac_platform_init(void) case MAC_SCSI_IIFX: /* Addresses from The Guide to Mac Family Hardware. * $5000 8000 - $5000 9FFF: SCSI DMA + * $5000 A000 - $5000 BFFF: Alternate SCSI * $5000 C000 - $5000 DFFF: Alternate SCSI (DMA) * $5000 E000 - $5000 FFFF: Alternate SCSI (Hsk) - * The SCSI DMA custom IC embeds the 53C80 core. mac_scsi does - * not make use of its DMA or hardware handshaking logic. + * The A/UX header file sys/uconfig.h says $50F0 8000. + * The "SCSI DMA" custom IC embeds the 53C80 core and + * supports Programmed IO, DMA and PDMA (hardware handshake). */ platform_device_register_simple("mac_scsi", 0, mac_scsi_iifx_rsrc, ARRAY_SIZE(mac_scsi_iifx_rsrc)); diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 9b6163c05a75..e9b1d7585b43 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -30,13 +30,13 @@ int send_fault_sig(struct pt_regs *regs) pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code); if (user_mode(regs)) { - force_sig_fault(signo, si_code, addr, current); + force_sig_fault(signo, si_code, addr); } else { if (fixup_exception(regs)) return -1; //if (signo == SIGBUS) - // force_sig_fault(si_signo, si_code, addr, current); + // force_sig_fault(si_signo, si_code, addr); /* * Oops. The kernel tried to access some bad page. We'll have to diff --git a/arch/m68k/q40/README b/arch/m68k/q40/README index 93f4c4cd3c45..a4991d2d8af6 100644 --- a/arch/m68k/q40/README +++ b/arch/m68k/q40/README @@ -31,7 +31,7 @@ drivers used by the Q40, apart from the very obvious (console etc.): char/joystick/* # most of this should work, not # in default config.in block/q40ide.c # startup for ide - ide* # see Documentation/ide/ide.txt + ide* # see Documentation/ide/ide.rst floppy.c # normal PC driver, DMA emu in asm/floppy.h # and arch/m68k/kernel/entry.S # see drivers/block/README.fd diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index f11433daab4a..d411de05b628 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -3,6 +3,7 @@ config MICROBLAZE def_bool y select ARCH_32BIT_OFF_T select ARCH_NO_SWAP + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SYNC_DMA_FOR_CPU diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index 3a343188d86c..865527ac332a 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. config TRACE_IRQFLAGS_SUPPORT def_bool y diff --git a/arch/microblaze/Kconfig.platform b/arch/microblaze/Kconfig.platform index 5bf54c1d4f60..7795f90dad86 100644 --- a/arch/microblaze/Kconfig.platform +++ b/arch/microblaze/Kconfig.platform @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # # Platform selection Kconfig menu for MicroBlaze targets # diff --git a/arch/microblaze/include/asm/flat.h b/arch/microblaze/include/asm/flat.h index 3d2747d4c967..1ab86770eaee 100644 --- a/arch/microblaze/include/asm/flat.h +++ b/arch/microblaze/include/asm/flat.h @@ -13,11 +13,6 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -#define flat_set_persistent(relval, p) 0 - /* * Microblaze works a little differently from other arches, because * of the MICROBLAZE_64 reloc type. Here, a 32 bit address is split @@ -33,7 +28,7 @@ */ static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { u32 *p = (__force u32 *)rp; diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index eafff21fcb0e..cf99c411503e 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -63,7 +63,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (kernel_mode(regs)) die("Exception in kernel mode", regs, signr); - force_sig_fault(signr, code, (void __user *)addr, current); + force_sig_fault(signr, code, (void __user *)addr); } asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 0685696349bb..cdd4feb279c5 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -108,7 +108,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return rval; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 26339e417695..09b0cd7dab0a 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -439,3 +439,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 202ad6a494f5..e6a810b0c7ad 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -289,7 +289,7 @@ out_of_memory: do_sigbus: up_read(&mm->mmap_sem); if (user_mode(regs)) { - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return; } bad_page_fault(regs, address, SIGBUS); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 70d3200476bf..d50fafd7bf3a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -34,6 +34,7 @@ config MIPS select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT select HANDLE_DOMAIN_IRQ select HAVE_ARCH_COMPILER_H select HAVE_ARCH_JUMP_LABEL @@ -52,6 +53,7 @@ config MIPS select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EXIT_THREAD + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER @@ -1119,6 +1121,7 @@ config DMA_NONCOHERENT bool select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_UNCACHED_SEGMENT select NEED_DMA_MAP_STATE select ARCH_HAS_DMA_COHERENT_TO_PFN select DMA_NONCOHERENT_CACHE_SYNC diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 8f4486c4415b..eceff9b75b22 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -17,6 +17,7 @@ archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs KBUILD_DEFCONFIG := 32r2el_defconfig +KBUILD_DTBS := dtbs # # Select the object file format to substitute into the linker script. @@ -384,7 +385,7 @@ quiet_cmd_64 = OBJCOPY $@ vmlinux.64: vmlinux $(call cmd,64) -all: $(all-y) +all: $(all-y) $(KBUILD_DTBS) # boot $(boot-y): $(vmlinux-32) FORCE diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 3c453a1f1ff1..172801ed35b8 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -78,6 +78,8 @@ OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE $(call if_changed,objcopy) +HOSTCFLAGS_calc_vmlinuz_load_addr.o += $(LINUXINCLUDE) + # Calculate the load address of the compressed kernel image hostprogs-y := calc_vmlinuz_load_addr diff --git a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c index 240f1d12df75..080b926d2623 100644 --- a/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c +++ b/arch/mips/boot/compressed/calc_vmlinuz_load_addr.c @@ -9,7 +9,7 @@ #include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include "../../../../include/linux/sizes.h" +#include <linux/sizes.h> int main(int argc, char *argv[]) { diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 90c60d42f571..33ae74aaa1bb 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -132,11 +132,12 @@ <0x1270000 0x100>, <0x1280000 0x100>, <0x1800000 0x80000>, - <0x1880000 0x10000>; + <0x1880000 0x10000>, + <0x1060000 0x10000>; reg-names = "sys", "rew", "qs", "port0", "port1", "port2", "port3", "port4", "port5", "port6", "port7", "port8", "port9", "port10", "qsys", - "ana"; + "ana", "s2"; interrupts = <21 22>; interrupt-names = "xtr", "inj"; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 2bae201aa365..63a9f33aa43e 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -116,6 +116,32 @@ }; }; + eth0: ethernet@19000000 { + compatible = "qca,ar9330-eth"; + reg = <0x19000000 0x200>; + interrupts = <4>; + + resets = <&rst 9>, <&rst 22>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + status = "disabled"; + }; + + eth1: ethernet@1a000000 { + compatible = "qca,ar9330-eth"; + reg = <0x1a000000 0x200>; + interrupts = <5>; + + resets = <&rst 13>, <&rst 23>; + reset-names = "mac", "mdio"; + clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>; + clock-names = "eth", "mdio"; + + status = "disabled"; + }; + usb: usb@1b000100 { compatible = "chipidea,usb2"; reg = <0x1b000000 0x200>; diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index e7af2cf5f4c1..77bab823eb3b 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -76,3 +76,11 @@ reg = <0>; }; }; + +ð0 { + status = "okay"; +}; + +ð1 { + status = "okay"; +}; diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 0ee5e677662e..0de92ac1ca64 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -210,7 +210,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 041bffac043b..efc3abace048 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -215,7 +215,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_kvm_guest_defconfig b/arch/mips/configs/malta_kvm_guest_defconfig index 511065e62182..c6ceeca4394d 100644 --- a/arch/mips/configs/malta_kvm_guest_defconfig +++ b/arch/mips/configs/malta_kvm_guest_defconfig @@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index 299088043164..e6c600dc1814 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -74,7 +74,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 2b4b3a24f637..82b44b774553 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -76,7 +76,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 425ddfd7cd78..4190fc6189a0 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -77,7 +77,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index 8beaa7ba1e52..a13c10e910ec 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -78,7 +78,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index 6e8b95ceb54a..b35f1fc690fb 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -75,7 +75,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=y -CONFIG_NET_CLS_IND=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index 6c026db96ff9..56861aef2756 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -212,7 +212,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_MESH=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 50632a3103dd..864c70fbe668 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -103,7 +103,6 @@ CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_HAMRADIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 94096299fc56..9a82dd11c0e9 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -254,10 +254,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) #define ATOMIC64_OP(op, c_op, asm_op) \ -static __inline__ void atomic64_##op(long i, atomic64_t * v) \ +static __inline__ void atomic64_##op(s64 i, atomic64_t * v) \ { \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -280,12 +280,12 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ } #define ATOMIC64_OP_RETURN(op, c_op, asm_op) \ -static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v) \ { \ - long result; \ + s64 result; \ \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -314,12 +314,12 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ } #define ATOMIC64_FETCH_OP(op, c_op, asm_op) \ -static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v) \ +static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v) \ { \ - long result; \ + s64 result; \ \ if (kernel_uses_llsc) { \ - long temp; \ + s64 temp; \ \ loongson_llsc_mb(); \ __asm__ __volatile__( \ @@ -386,14 +386,14 @@ ATOMIC64_OPS(xor, ^=, xor) * Atomically test @v and subtract @i if @v is greater or equal than @i. * The function returns the old value of @v minus @i. */ -static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) +static __inline__ s64 atomic64_sub_if_positive(s64 i, atomic64_t * v) { - long result; + s64 result; smp_mb__before_llsc(); if (kernel_uses_llsc) { - long temp; + s64 temp; __asm__ __volatile__( " .set push \n" diff --git a/arch/mips/include/asm/mach-ath79/ar933x_uart.h b/arch/mips/include/asm/mach-ath79/ar933x_uart.h index b8f8af7dc47c..cacf3545e018 100644 --- a/arch/mips/include/asm/mach-ath79/ar933x_uart.h +++ b/arch/mips/include/asm/mach-ath79/ar933x_uart.h @@ -24,8 +24,8 @@ #define AR933X_UART_CS_PARITY_S 0 #define AR933X_UART_CS_PARITY_M 0x3 #define AR933X_UART_CS_PARITY_NONE 0 -#define AR933X_UART_CS_PARITY_ODD 1 -#define AR933X_UART_CS_PARITY_EVEN 2 +#define AR933X_UART_CS_PARITY_ODD 2 +#define AR933X_UART_CS_PARITY_EVEN 3 #define AR933X_UART_CS_IF_MODE_S 2 #define AR933X_UART_CS_IF_MODE_M 0x3 #define AR933X_UART_CS_IF_MODE_NONE 0 diff --git a/arch/mips/include/asm/mips-gic.h b/arch/mips/include/asm/mips-gic.h index 75a1cdee1331..084cac1c5ea2 100644 --- a/arch/mips/include/asm/mips-gic.h +++ b/arch/mips/include/asm/mips-gic.h @@ -311,6 +311,36 @@ static inline bool mips_gic_present(void) } /** + * mips_gic_vx_map_reg() - Return GIC_Vx_<intr>_MAP register offset + * @intr: A GIC local interrupt + * + * Determine the index of the GIC_VL_<intr>_MAP or GIC_VO_<intr>_MAP register + * within the block of GIC map registers. This is almost the same as the order + * of interrupts in the pending & mask registers, as used by enum + * mips_gic_local_interrupt, but moves the FDC interrupt & thus offsets the + * interrupts after it... + * + * Return: The map register index corresponding to @intr. + * + * The return value is suitable for use with the (read|write)_gic_v[lo]_map + * accessor functions. + */ +static inline unsigned int +mips_gic_vx_map_reg(enum mips_gic_local_interrupt intr) +{ + /* WD, Compare & Timer are 1:1 */ + if (intr <= GIC_LOCAL_INT_TIMER) + return intr; + + /* FDC moves to after Timer... */ + if (intr == GIC_LOCAL_INT_FDC) + return GIC_LOCAL_INT_TIMER + 1; + + /* As a result everything else is offset by 1 */ + return intr + 1; +} + +/** * gic_get_c0_compare_int() - Return cp0 count/compare interrupt virq * * Determine the virq number to use for the coprocessor 0 count/compare diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index a25643d258cb..0ba4ce6e2bf3 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -258,9 +258,6 @@ extern bool __virt_addr_valid(const volatile void *kaddr); ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define UNCAC_ADDR(addr) (UNCAC_BASE + __pa(addr)) -#define CAC_ADDR(addr) ((unsigned long)__va((addr) - UNCAC_BASE)) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 27808d9461f4..aa16b85ddffc 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -13,6 +13,8 @@ #include <linux/mm.h> #include <linux/sched.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_pages((unsigned long)pgd, PGD_ORDER); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER); -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL, PTE_ORDER); - if (!pte) - return NULL; - clear_highpage(pte); - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - #define __pte_free_tlb(tlb,pte,address) \ do { \ pgtable_page_dtor(pte); \ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4ccb465ef3f2..7d27194e3b45 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -20,6 +20,7 @@ #include <asm/cmpxchg.h> #include <asm/io.h> #include <asm/pgtable-bits.h> +#include <asm/cpu-features.h> struct mm_struct; struct vm_area_struct; @@ -626,6 +627,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases) + #include <asm-generic/pgtable.h> /* diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index b6578611dddb..1e76774b36dd 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -56,11 +56,6 @@ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) return regs->regs[31]; } -/* - * Don't use asm-generic/ptrace.h it defines FP accessors that don't make - * sense on MIPS. We rather want an error if they get invoked. - */ - static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 0f813bb753c6..09cbe9042828 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -42,7 +42,7 @@ extern struct task_struct *ll_task; * inline to try to keep the overhead down. If we have been forced to run on * a "CPU" with an FPU because of a previous high level of FP computation, * but did not actually use the FPU during the most recent time-slice (CU1 - * isn't set), we undo the restriction on cpus_allowed. + * isn't set), we undo the restriction on cpus_mask. * * We're not calling set_cpus_allowed() here, because we have no need to * force prompt migration - we're already switching the current CPU to a @@ -57,7 +57,7 @@ do { \ test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ (!(KSTK_STATUS(prev) & ST0_CU1))) { \ clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ - prev->cpus_allowed = prev->thread.user_cpus_allowed; \ + prev->cpus_mask = prev->thread.user_cpus_allowed; \ } \ next->thread.emulated_fp = 0; \ } while(0) diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d41765cfbc6e..d0a9ed2ca2d6 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -133,6 +133,8 @@ #define SO_RCVTIMEO_NEW 66 #define SO_SNDTIMEO_NEW 67 +#define SO_DETACH_REUSEPORT_BPF 68 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index bedb5047aff3..1804dc9d8136 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -575,10 +575,6 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, return NULL; } - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) { - dma_cache_wback_inv((unsigned long)ret, size); - ret = (void *)UNCAC_ADDR(ret); - } return ret; } @@ -586,8 +582,6 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - vaddr = (void *)CAC_ADDR((unsigned long)vaddr); dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); } diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 180ad081afcf..1db29957a931 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -32,7 +32,7 @@ int __isa_exception_epc(struct pt_regs *regs) /* Calculate exception PC in branch delay slot. */ if (__get_user(inst, (u16 __user *) msk_isa16_mode(epc))) { /* This should never happen because delay slot was checked. */ - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return epc; } if (cpu_has_mips16) { @@ -305,7 +305,7 @@ int __microMIPS_compute_return_epc(struct pt_regs *regs) return 0; sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -328,7 +328,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) /* Read the instruction. */ addr = (u16 __user *)msk_isa16_mode(epc); if (__get_user(inst.full, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -343,7 +343,7 @@ int __MIPS16e_compute_return_epc(struct pt_regs *regs) case MIPS16e_jal_op: addr += 1; if (__get_user(inst2, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } fullinst = ((unsigned)inst.full << 16) | inst2; @@ -829,17 +829,17 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, sigill_dsp: pr_debug("%s: DSP branch but not DSP ASE - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; sigill_r2r6: pr_debug("%s: R2 branch but r2-to-r6 emulator is not present - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; sigill_r6: pr_debug("%s: R6 branch but no MIPSr6 ISA support - sending SIGILL.\n", current->comm); - force_sig(SIGILL, current); + force_sig(SIGILL); return -EFAULT; } EXPORT_SYMBOL_GPL(__compute_return_epc_for_insn); @@ -859,7 +859,7 @@ int __compute_return_epc(struct pt_regs *regs) */ addr = (unsigned int __user *) epc; if (__get_user(insn.word, addr)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return -EFAULT; } @@ -867,7 +867,7 @@ int __compute_return_epc(struct pt_regs *regs) unaligned: printk("%s: unaligned epc - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return -EFAULT; } diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 07c941c99e92..81ba1d3c367c 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -220,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs, unaligned: pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return -EFAULT; } diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index a7c0f97e4b0d..1a08428eedcf 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -177,7 +177,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, if (retval) goto out_unlock; - cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); + cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); cpumask_and(&mask, &allowed, cpu_active_mask); out_unlock: diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index d75337974ee9..f6efabcb4e92 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -641,7 +641,7 @@ asmlinkage void sys_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); /* * Don't let your children do this ... @@ -654,7 +654,7 @@ asmlinkage void sys_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } #endif /* CONFIG_TRAD_SIGNALS */ @@ -678,7 +678,7 @@ asmlinkage void sys_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -694,7 +694,7 @@ asmlinkage void sys_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } #ifdef CONFIG_TRAD_SIGNALS diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index 9a6e58b48bb6..7bd00fad61af 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -71,7 +71,7 @@ asmlinkage void sysn32_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (compat_restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -87,7 +87,7 @@ asmlinkage void sysn32_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static int setup_rt_frame_n32(void *sig_return, struct ksignal *ksig, diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c index df259618e834..299a7a28ca33 100644 --- a/arch/mips/kernel/signal_o32.c +++ b/arch/mips/kernel/signal_o32.c @@ -171,7 +171,7 @@ asmlinkage void sys32_rt_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); if (compat_restore_altstack(&frame->rs_uc.uc_stack)) goto badframe; @@ -187,7 +187,7 @@ asmlinkage void sys32_rt_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static int setup_rt_frame_32(void *sig_return, struct ksignal *ksig, @@ -273,7 +273,7 @@ asmlinkage void sys32_sigreturn(void) if (sig < 0) goto badframe; else if (sig) - force_sig(sig, current); + force_sig(sig); /* * Don't let your children do this ... @@ -286,5 +286,5 @@ asmlinkage void sys32_sigreturn(void) /* Unreached */ badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 0e2dd68ade57..97035e19ad03 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -372,3 +372,4 @@ 431 n32 fsconfig sys_fsconfig 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick +434 n32 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 5eebfa0d155c..d7292722d3b0 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -348,3 +348,4 @@ 431 n64 fsconfig sys_fsconfig 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 3cc1374e02d0..dba084c92f14 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -421,3 +421,4 @@ 431 o32 fsconfig sys_fsconfig 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick +434 o32 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index c52766a5b85f..342e41de9d64 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -482,7 +482,7 @@ asmlinkage void do_be(struct pt_regs *regs) goto out; die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); out: exception_exit(prev_state); @@ -705,7 +705,7 @@ asmlinkage void do_ov(struct pt_regs *regs) prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current); + force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc); exception_exit(prev_state); } @@ -733,7 +733,7 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, else if (fcr31 & FPU_CSR_INE_X) si_code = FPE_FLTRES; - force_sig_fault(SIGFPE, si_code, fault_addr, tsk); + force_sig_fault_to_task(SIGFPE, si_code, fault_addr, tsk); } int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) @@ -750,7 +750,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 1; case SIGBUS: - force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current); + force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr); return 1; case SIGSEGV: @@ -761,11 +761,11 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) else si_code = SEGV_MAPERR; up_read(¤t->mm->mmap_sem); - force_sig_fault(SIGSEGV, si_code, fault_addr, current); + force_sig_fault(SIGSEGV, si_code, fault_addr); return 1; default: - force_sig(sig, current); + force_sig(sig); return 1; } } @@ -891,12 +891,12 @@ static void mt_ase_fp_affinity(void) * restricted the allowed set to exclude any CPUs with FPUs, * we'll skip the procedure. */ - if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { + if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { cpumask_t tmask; current->thread.user_cpus_allowed - = current->cpus_allowed; - cpumask_and(&tmask, ¤t->cpus_allowed, + = current->cpus_mask; + cpumask_and(&tmask, ¤t->cpus_mask, &mt_fpu_cpumask); set_cpus_allowed_ptr(current, &tmask); set_thread_flag(TIF_FPUBOUND); @@ -943,11 +943,11 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, die_if_kernel(b, regs); force_sig_fault(SIGFPE, code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF, - (void __user *) regs->cp0_epc, current); + (void __user *) regs->cp0_epc); break; case BRK_BUG: die_if_kernel("Kernel bug detected", regs); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); break; case BRK_MEMU: /* @@ -962,15 +962,15 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, return; die_if_kernel("Math emu break/trap", regs); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); break; default: scnprintf(b, sizeof(b), "%s instruction in kernel code", str); die_if_kernel(b, regs); if (si_code) { - force_sig_fault(SIGTRAP, si_code, NULL, current); + force_sig_fault(SIGTRAP, si_code, NULL); } else { - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } } } @@ -1063,7 +1063,7 @@ out: return; out_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1105,7 +1105,7 @@ out: return; out_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1191,7 +1191,7 @@ no_r2_instr: if (unlikely(status > 0)) { regs->cp0_epc = old_epc; /* Undo skip-over. */ regs->regs[31] = old31; - force_sig(status, current); + force_sig(status); } out: @@ -1220,7 +1220,7 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action, die_if_kernel("COP2: Unhandled kernel unaligned access or invalid " "instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); return NOTIFY_OK; } @@ -1383,7 +1383,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) if (unlikely(status > 0)) { regs->cp0_epc = old_epc; /* Undo skip-over. */ regs->regs[31] = old31; - force_sig(status, current); + force_sig(status); } break; @@ -1403,7 +1403,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) * emulator too. */ if (raw_cpu_has_fpu || !cpu_has_mips_4_5_64_r2_r6) { - force_sig(SIGILL, current); + force_sig(SIGILL); break; } /* Fall through. */ @@ -1437,7 +1437,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) #else /* CONFIG_MIPS_FP_SUPPORT */ case 1: case 3: - force_sig(SIGILL, current); + force_sig(SIGILL); break; #endif /* CONFIG_MIPS_FP_SUPPORT */ @@ -1464,7 +1464,7 @@ asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr) local_irq_enable(); die_if_kernel("do_msa_fpe invoked from kernel context!", regs); - force_sig(SIGFPE, current); + force_sig(SIGFPE); out: exception_exit(prev_state); } @@ -1477,7 +1477,7 @@ asmlinkage void do_msa(struct pt_regs *regs) prev_state = exception_enter(); if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) { - force_sig(SIGILL, current); + force_sig(SIGILL); goto out; } @@ -1485,7 +1485,7 @@ asmlinkage void do_msa(struct pt_regs *regs) err = enable_restore_fp_context(1); if (err) - force_sig(SIGILL, current); + force_sig(SIGILL); out: exception_exit(prev_state); } @@ -1495,7 +1495,7 @@ asmlinkage void do_mdmx(struct pt_regs *regs) enum ctx_state prev_state; prev_state = exception_enter(); - force_sig(SIGILL, current); + force_sig(SIGILL); exception_exit(prev_state); } @@ -1521,7 +1521,7 @@ asmlinkage void do_watch(struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) { mips_read_watch_registers(); local_irq_enable(); - force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL); } else { mips_clear_watch_registers(); local_irq_enable(); @@ -1592,7 +1592,7 @@ asmlinkage void do_mt(struct pt_regs *regs) } die_if_kernel("MIPS MT Thread exception in kernel", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } @@ -1601,7 +1601,7 @@ asmlinkage void do_dsp(struct pt_regs *regs) if (cpu_has_dsp) panic("Unexpected DSP exception"); - force_sig(SIGILL, current); + force_sig(SIGILL); } asmlinkage void do_reserved(struct pt_regs *regs) diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index 76e33f940971..92bd2b0f0548 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -1365,20 +1365,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* Recode table from 16-bit register notation to 32-bit GPR. */ @@ -1991,20 +1991,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } static void emulate_load_store_MIPS16e(struct pt_regs *regs, void __user * addr) @@ -2271,20 +2271,20 @@ fault: return; die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; sigbus: die_if_kernel("Unhandled kernel unaligned access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); return; sigill: die_if_kernel ("Unhandled kernel unaligned access or invalid instruction", regs); - force_sig(SIGILL, current); + force_sig(SIGILL); } asmlinkage void do_ade(struct pt_regs *regs) @@ -2364,7 +2364,7 @@ asmlinkage void do_ade(struct pt_regs *regs) sigbus: die_if_kernel("Kernel unaligned instruction access", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); /* * XXX On return from the signal handler we should advance the epc diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0369f26ab96d..2cfe839f0b3a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -123,9 +123,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index f34d7ff5eb60..1e8d335025d7 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -7,7 +7,6 @@ obj-y += cache.o obj-y += context.o obj-y += extable.o obj-y += fault.o -obj-y += gup.o obj-y += init.o obj-y += mmap.o obj-y += page.o diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 3da216988672..33b409391ddb 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -62,8 +62,6 @@ void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); void (*_dma_cache_wback)(unsigned long start, unsigned long size); void (*_dma_cache_inv)(unsigned long start, unsigned long size); -EXPORT_SYMBOL(_dma_cache_wback_inv); - #endif /* CONFIG_DMA_NONCOHERENT */ /* diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index f9549d2fbea3..ed56c6fa7be2 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -44,33 +44,25 @@ static inline bool cpu_needs_post_dma_flush(struct device *dev) } } -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret; - - ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) { - dma_cache_wback_inv((unsigned long) ret, size); - ret = (void *)UNCAC_ADDR(ret); - } + dma_cache_wback_inv((unsigned long)page_address(page), size); +} - return ret; +void *uncached_kernel_address(void *addr) +{ + return (void *)(__pa(addr) + UNCAC_BASE); } -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void *cached_kernel_address(void *addr) { - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr); - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); + return __va(addr) - UNCAC_BASE; } long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, dma_addr_t dma_addr) { - unsigned long addr = CAC_ADDR((unsigned long)cpu_addr); - return page_to_pfn(virt_to_page((void *)addr)); + return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); } pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 73d8a0f0b810..f589aa8f47d9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -223,7 +223,7 @@ bad_area_nosemaphore: pr_cont("\n"); } current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; - force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -279,7 +279,7 @@ do_sigbus: #endif current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; tsk->thread.cp0_badvaddr = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return; #ifndef CONFIG_64BIT diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c deleted file mode 100644 index 4c2b4483683c..000000000000 --- a/arch/mips/mm/gup.c +++ /dev/null @@ -1,303 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for MIPS - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - * Copyright (C) 2011 Ralf Baechle - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <linux/swap.h> -#include <linux/hugetlb.h> - -#include <asm/cpu-features.h> -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#else - return READ_ONCE(*ptep); -#endif -} - -static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t *ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if (!pte_present(pte) || - pte_special(pte) || (write && !pte_write(pte))) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - SetPageReferenced(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - - pte_unmap(ptep - 1); - return 1; -} - -static inline void get_head_page_multiple(struct page *page, int nr) -{ - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); - page_ref_add(page, nr); - SetPageReferenced(page); -} - -static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pmd; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_huge(pmd))) { - if (!gup_huge_pmd(pmd, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pte_range(pmd, addr, next, write, pages,nr)) - return 0; - } - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - pte_t pte = *(pte_t *)&pud; - struct page *head, *page; - int refs; - - if (write && !pte_write(pte)) - return 0; - /* hugepages are never "special" */ - VM_BUG_ON(pte_special(pte)); - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - get_head_page_multiple(head, refs); - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_huge(pud))) { - if (!gup_huge_pud(pud, addr, next, write, pages,nr)) - return 0; - } else { - if (!gup_pmd_range(pud, addr, next, write, pages,nr)) - return 0; - } - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok((void __user *)start, len))) - return 0; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch - * size will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the page and take a ref on it. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @gup_flags: flags modifying pin behaviour - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int ret, nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start || cpu_has_dc_aliases) - goto slow_irqon; - - /* XXX: batch / limit 'nr' */ - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; -slow: - local_irq_enable(); - -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - pages, gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - return ret; -} diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 50ee7213b432..d79f2b432318 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -203,7 +203,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) bool __virt_addr_valid(const volatile void *kaddr) { - unsigned long vaddr = (unsigned long)vaddr; + unsigned long vaddr = (unsigned long)kaddr; if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) return false; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 65b6e85447b1..144ceb0fba88 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -391,6 +391,7 @@ static struct work_registers build_get_work_registers(u32 **p) static void build_restore_work_registers(u32 **p) { if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); return; } @@ -668,10 +669,12 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -938,10 +941,12 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) + if (scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - else + } else { UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } } else { uasm_i_nop(p); } @@ -1258,6 +1263,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); build_tlb_write_entry(p, l, r, tlb_random); uasm_l_leave(l, *p); @@ -1603,15 +1609,17 @@ static void build_setup_pgd(void) uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); uasm_l_tlbl_goaround1(&l, p); UASM_i_SLL(&p, a0, a0, 11); - uasm_i_jr(&p, 31); UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } else { /* PGD in c0_KScratch */ - uasm_i_jr(&p, 31); if (cpu_has_ldpte) UASM_i_MTC0(&p, a0, C0_PWBASE); else UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); } #else #ifdef CONFIG_SMP @@ -1625,13 +1633,16 @@ static void build_setup_pgd(void) UASM_i_LA_mostly(&p, a2, pgdc); UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); #endif /* SMP */ - uasm_i_jr(&p, 31); /* if pgd_reg is allocated, save PGD also to scratch register */ - if (pgd_reg != -1) + if (pgd_reg != -1) { UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); - else + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); uasm_i_nop(&p); + } #endif if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) panic("tlbmiss_handler_setup_pgd space exceeded"); diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index 34bb9801d5ff..dc0110a607a5 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -98,7 +98,7 @@ void ip22_be_interrupt(int irq) field, regs->cp0_epc, field, regs->regs[31]); /* Assume it would be too dangerous to continue ... */ die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } static int ip22_be_handler(struct pt_regs *regs, int is_fixup) diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 082541d33161..c0cf7baee36d 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -462,7 +462,7 @@ void ip22_be_interrupt(int irq) if (ip28_be_interrupt(regs) != MIPS_BE_DISCARD) { /* Assume it would be too dangerous to continue ... */ die_if_kernel("Oops", regs); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } else if (debug_be_interrupt) show_regs(regs); } diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index 83efe03d5c60..73ad29b180fb 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -74,7 +74,7 @@ int ip27_be_handler(struct pt_regs *regs, int is_fixup) show_regs(regs); dump_tlb_all(); while(1); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } void __init ip27_be_init(void) diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index c1f12a9cf305..c860f95ab7ed 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -29,7 +29,7 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup) show_regs(regs); dump_tlb_all(); while(1); - force_sig(SIGBUS, current); + force_sig(SIGBUS); } void __init ip32_be_init(void) diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 3299e287a477..fbd68329737f 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -1,18 +1,20 @@ # SPDX-License-Identifier: GPL-2.0-only # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config NDS32 def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_WANT_FRAME_POINTERS if FTRACE select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK + select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 14dab5ad88ef..ccdca7142020 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -2,8 +2,6 @@ LDFLAGS_vmlinux := --no-undefined -X OBJCOPYFLAGS := -O binary -R .note -R .note.gnu.build-id -R .comment -S -KBUILD_DEFCONFIG := defconfig - ifdef CONFIG_FUNCTION_TRACER arch-y += -malways-save-lp -mno-relax endif diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig index 65ce9259081b..40313a635075 100644 --- a/arch/nds32/configs/defconfig +++ b/arch/nds32/configs/defconfig @@ -92,6 +92,7 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y CONFIG_GDB_SCRIPTS=y CONFIG_READABLE_ASM=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h index 3cbc749c79aa..e78b43d8389f 100644 --- a/arch/nds32/include/asm/pgalloc.h +++ b/arch/nds32/include/asm/pgalloc.h @@ -9,6 +9,9 @@ #include <asm/tlbflush.h> #include <asm/proc-fns.h> +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + /* * Since we have only two-level page tables, these are trivial */ @@ -22,22 +25,11 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd); #define check_pgt_cache() do { } while (0) -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = - (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_ZERO); - - return pte; -} - static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { pgtable_t pte; - pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0); + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER); if (pte) cpu_dcache_wb_page((unsigned long)page_address(pte)); @@ -45,21 +37,6 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) } /* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte) -{ - if (pte) { - free_page((unsigned long)pte); - } -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - __free_page(pte); -} - -/* * Populate the pmdp entry with a pointer to the pte. This pmd is part * of the mm address space. * diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index d0dbd4fe9645..490e3720d694 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,327 +3,13 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/string.h> #include <linux/dma-noncoherent.h> -#include <linux/io.h> #include <linux/cache.h> #include <linux/highmem.h> -#include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/proc-fns.h> -/* - * This is the page table (2MB) covering uncached, DMA consistent allocations - */ -static pte_t *consistent_pte; -static DEFINE_RAW_SPINLOCK(consistent_lock); - -/* - * VM region handling support. - * - * This should become something generic, handling VM region allocations for - * vmalloc and similar (ioremap, module space, etc). - * - * I envisage vmalloc()'s supporting vm_struct becoming: - * - * struct vm_struct { - * struct vm_region region; - * unsigned long flags; - * struct page **pages; - * unsigned int nr_pages; - * unsigned long phys_addr; - * }; - * - * get_vm_area() would then call vm_region_alloc with an appropriate - * struct vm_region head (eg): - * - * struct vm_region vmalloc_head = { - * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), - * .vm_start = VMALLOC_START, - * .vm_end = VMALLOC_END, - * }; - * - * However, vmalloc_head.vm_start is variable (typically, it is dependent on - * the amount of RAM found at boot time.) I would imagine that get_vm_area() - * would have to initialise this each time prior to calling vm_region_alloc(). - */ -struct arch_vm_region { - struct list_head vm_list; - unsigned long vm_start; - unsigned long vm_end; - struct page *vm_pages; -}; - -static struct arch_vm_region consistent_head = { - .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), - .vm_start = CONSISTENT_BASE, - .vm_end = CONSISTENT_END, -}; - -static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head, - size_t size, int gfp) -{ - unsigned long addr = head->vm_start, end = head->vm_end - size; - unsigned long flags; - struct arch_vm_region *c, *new; - - new = kmalloc(sizeof(struct arch_vm_region), gfp); - if (!new) - goto out; - - raw_spin_lock_irqsave(&consistent_lock, flags); - - list_for_each_entry(c, &head->vm_list, vm_list) { - if ((addr + size) < addr) - goto nospc; - if ((addr + size) <= c->vm_start) - goto found; - addr = c->vm_end; - if (addr > end) - goto nospc; - } - -found: - /* - * Insert this entry _before_ the one we found. - */ - list_add_tail(&new->vm_list, &c->vm_list); - new->vm_start = addr; - new->vm_end = addr + size; - - raw_spin_unlock_irqrestore(&consistent_lock, flags); - return new; - -nospc: - raw_spin_unlock_irqrestore(&consistent_lock, flags); - kfree(new); -out: - return NULL; -} - -static struct arch_vm_region *vm_region_find(struct arch_vm_region *head, - unsigned long addr) -{ - struct arch_vm_region *c; - - list_for_each_entry(c, &head->vm_list, vm_list) { - if (c->vm_start == addr) - goto out; - } - c = NULL; -out: - return c; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, unsigned long attrs) -{ - struct page *page; - struct arch_vm_region *c; - unsigned long order; - u64 mask = ~0ULL, limit; - pgprot_t prot = pgprot_noncached(PAGE_KERNEL); - - if (!consistent_pte) { - pr_err("%s: not initialized\n", __func__); - dump_stack(); - return NULL; - } - - if (dev) { - mask = dev->coherent_dma_mask; - - /* - * Sanity check the DMA mask - it must be non-zero, and - * must be able to be satisfied by a DMA allocation. - */ - if (mask == 0) { - dev_warn(dev, "coherent DMA mask is unset\n"); - goto no_page; - } - - } - - /* - * Sanity check the allocation size. - */ - size = PAGE_ALIGN(size); - limit = (mask + 1) & ~mask; - if ((limit && size >= limit) || - size >= (CONSISTENT_END - CONSISTENT_BASE)) { - pr_warn("coherent allocation too big " - "(requested %#x mask %#llx)\n", size, mask); - goto no_page; - } - - order = get_order(size); - - if (mask != 0xffffffff) - gfp |= GFP_DMA; - - page = alloc_pages(gfp, order); - if (!page) - goto no_page; - - /* - * Invalidate any data that might be lurking in the - * kernel direct-mapped region for device DMA. - */ - { - unsigned long kaddr = (unsigned long)page_address(page); - memset(page_address(page), 0, size); - cpu_dma_wbinval_range(kaddr, kaddr + size); - } - - /* - * Allocate a virtual address in the consistent mapping region. - */ - c = vm_region_alloc(&consistent_head, size, - gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); - if (c) { - pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - struct page *end = page + (1 << order); - - c->vm_pages = page; - - /* - * Set the "dma handle" - */ - *handle = page_to_phys(page); - - do { - BUG_ON(!pte_none(*pte)); - - /* - * x86 does not mark the pages reserved... - */ - SetPageReserved(page); - set_pte(pte, mk_pte(page, prot)); - page++; - pte++; - } while (size -= PAGE_SIZE); - - /* - * Free the otherwise unused pages. - */ - while (page < end) { - __free_page(page); - page++; - } - - return (void *)c->vm_start; - } - - if (page) - __free_pages(page, order); -no_page: - *handle = ~0; - return NULL; -} - -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) -{ - struct arch_vm_region *c; - unsigned long flags, addr; - pte_t *ptep; - - size = PAGE_ALIGN(size); - - raw_spin_lock_irqsave(&consistent_lock, flags); - - c = vm_region_find(&consistent_head, (unsigned long)cpu_addr); - if (!c) - goto no_area; - - if ((c->vm_end - c->vm_start) != size) { - pr_err("%s: freeing wrong coherent size (%ld != %d)\n", - __func__, c->vm_end - c->vm_start, size); - dump_stack(); - size = c->vm_end - c->vm_start; - } - - ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); - addr = c->vm_start; - do { - pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); - unsigned long pfn; - - ptep++; - addr += PAGE_SIZE; - - if (!pte_none(pte) && pte_present(pte)) { - pfn = pte_pfn(pte); - - if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - - /* - * x86 does not mark the pages reserved... - */ - ClearPageReserved(page); - - __free_page(page); - continue; - } - } - - pr_crit("%s: bad page in kernel page table\n", __func__); - } while (size -= PAGE_SIZE); - - flush_tlb_kernel_range(c->vm_start, c->vm_end); - - list_del(&c->vm_list); - - raw_spin_unlock_irqrestore(&consistent_lock, flags); - - kfree(c); - return; - -no_area: - raw_spin_unlock_irqrestore(&consistent_lock, flags); - pr_err("%s: trying to free invalid coherent area: %p\n", - __func__, cpu_addr); - dump_stack(); -} - -/* - * Initialise the consistent memory allocation. - */ -static int __init consistent_init(void) -{ - pgd_t *pgd; - pmd_t *pmd; - pte_t *pte; - int ret = 0; - - do { - pgd = pgd_offset(&init_mm, CONSISTENT_BASE); - pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE); - if (!pmd) { - pr_err("%s: no pmd tables\n", __func__); - ret = -ENOMEM; - break; - } - /* The first level mapping may be created in somewhere. - * It's not necessary to warn here. */ - /* WARN_ON(!pmd_none(*pmd)); */ - - pte = pte_alloc_kernel(pmd, CONSISTENT_BASE); - if (!pte) { - ret = -ENOMEM; - break; - } - - consistent_pte = pte; - } while (0); - - return ret; -} - -core_initcall(consistent_init); - static inline void cache_op(phys_addr_t paddr, size_t size, void (*fn)(unsigned long start, unsigned long end)) { @@ -389,3 +75,14 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, BUG(); } } + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + cache_op(page_to_phys(page), size, cpu_dma_wbinval_range); +} + +static int __init atomic_pool_init(void) +{ + return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL)); +} +postcore_initcall(atomic_pool_init); diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c index cf0b8760f261..62bdafbc53f4 100644 --- a/arch/nds32/kernel/fpu.c +++ b/arch/nds32/kernel/fpu.c @@ -243,7 +243,7 @@ inline void handle_fpu_exception(struct pt_regs *regs) } force_sig_fault(si_signo, si_code, - (void __user *)instruction_pointer(regs), current); + (void __user *)instruction_pointer(regs)); done: own_fpu(); } diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 5f7660aa2d68..fe61513982b4 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -163,7 +163,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return regs->uregs[0]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 5aa7c17da27a..f4d386b52622 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -205,7 +205,7 @@ int bad_syscall(int n, struct pt_regs *regs) } force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)instruction_pointer(regs) - 4, current); + (void __user *)instruction_pointer(regs) - 4); die_if_kernel("Oops - bad syscall", regs, n); return regs->uregs[0]; } @@ -255,14 +255,15 @@ void __init early_trap_init(void) cpu_cache_wbinval_page(base, true); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code) { + struct task_struct *tsk = current; + tsk->thread.trap_no = ENTRY_DEBUG_RELATED; tsk->thread.error_code = error_code; force_sig_fault(SIGTRAP, si_code, - (void __user *)instruction_pointer(regs), tsk); + (void __user *)instruction_pointer(regs)); } void do_debug_trap(unsigned long entry, unsigned long addr, @@ -274,7 +275,7 @@ void do_debug_trap(unsigned long entry, unsigned long addr, if (user_mode(regs)) { /* trap_signal */ - send_sigtrap(current, regs, 0, TRAP_BRKPT); + send_sigtrap(regs, 0, TRAP_BRKPT); } else { /* kernel_trap */ if (!fixup_exception(regs)) @@ -288,7 +289,7 @@ void unhandled_interruption(struct pt_regs *regs) show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } void unhandled_exceptions(unsigned long entry, unsigned long addr, @@ -299,7 +300,7 @@ void unhandled_exceptions(unsigned long entry, unsigned long addr, show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } extern int do_page_fault(unsigned long entry, unsigned long addr, @@ -326,7 +327,7 @@ void do_revinsn(struct pt_regs *regs) show_regs(regs); if (!user_mode(regs)) do_exit(SIGILL); - force_sig(SIGILL, current); + force_sig(SIGILL); } #ifdef CONFIG_ALIGNMENT_TRAP diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 68d5f2a27f38..064ae5d2159d 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -271,7 +271,7 @@ bad_area_nosemaphore: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)addr); return; } @@ -340,7 +340,7 @@ do_sigbus: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); return; diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 26a9c760a98b..44b5da37e8bd 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -4,6 +4,7 @@ config NIOS2 select ARCH_32BIT_OFF_T select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_UNCACHED_SEGMENT select ARCH_NO_SWAP select TIMER_OF select GENERIC_ATOMIC64 diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug index f1da8a7b17ff..a8bc06e96ef5 100644 --- a/arch/nios2/Kconfig.debug +++ b/arch/nios2/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_SUPPORT - def_bool y - config EARLY_PRINTK bool "Activate early kernel debugging" default y diff --git a/arch/nios2/configs/10m50_defconfig b/arch/nios2/configs/10m50_defconfig index 7977ab7e2ca6..1137ef2ed3b0 100644 --- a/arch/nios2/configs/10m50_defconfig +++ b/arch/nios2/configs/10m50_defconfig @@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_FW_LOADER is not set diff --git a/arch/nios2/configs/3c120_defconfig b/arch/nios2/configs/3c120_defconfig index ceb97cd85ac1..a0f160ba7598 100644 --- a/arch/nios2/configs/3c120_defconfig +++ b/arch/nios2/configs/3c120_defconfig @@ -37,7 +37,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_FW_LOADER is not set diff --git a/arch/nios2/include/asm/page.h b/arch/nios2/include/asm/page.h index f1fbdc47bdaf..79fcac61f6ef 100644 --- a/arch/nios2/include/asm/page.h +++ b/arch/nios2/include/asm/page.h @@ -101,12 +101,6 @@ static inline bool pfn_valid(unsigned long pfn) # define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -# define UNCAC_ADDR(addr) \ - ((void *)((unsigned)(addr) | CONFIG_NIOS2_IO_REGION_BASE)) -# define CAC_ADDR(addr) \ - ((void *)(((unsigned)(addr) & ~CONFIG_NIOS2_IO_REGION_BASE) | \ - CONFIG_NIOS2_KERNEL_REGION_BASE)) - #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 3a149ead1207..4bc8cf72067e 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -12,6 +12,8 @@ #include <linux/mm.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_pages((unsigned long)pgd, PGD_ORDER); } -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER); - - return pte; -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_pages(GFP_KERNEL, PTE_ORDER); - if (pte) { - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - clear_highpage(pte); - } - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_pages((unsigned long)pte, PTE_ORDER); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_pages(pte, PTE_ORDER); -} - #define __pte_free_tlb(tlb, pte, addr) \ do { \ pgtable_page_dtor(pte); \ diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 4a81876b6086..a42dd09c6578 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -120,7 +120,7 @@ asmlinkage int do_rt_sigreturn(struct switch_stack *sw) return rval; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -211,7 +211,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, return 0; give_sigsegv: - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EFAULT; } diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 3bc3cd22b750..486db793923c 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -26,7 +26,7 @@ static DEFINE_SPINLOCK(die_lock); static void _send_sig(int signo, int code, unsigned long addr) { - force_sig_fault(signo, code, (void __user *) addr, current); + force_sig_fault(signo, code, (void __user *) addr); } void die(const char *str, struct pt_regs *regs, long err) diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c index 4af9e5b5ba1c..9cb238664584 100644 --- a/arch/nios2/mm/dma-mapping.c +++ b/arch/nios2/mm/dma-mapping.c @@ -60,32 +60,28 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, } } -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) +void arch_dma_prep_coherent(struct page *page, size_t size) { - void *ret; + unsigned long start = (unsigned long)page_address(page); - /* optimized page clearing */ - gfp |= __GFP_ZERO; + flush_dcache_range(start, start + size); +} - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; +void *uncached_kernel_address(void *ptr) +{ + unsigned long addr = (unsigned long)ptr; - ret = (void *) __get_free_pages(gfp, get_order(size)); - if (ret != NULL) { - *dma_handle = virt_to_phys(ret); - flush_dcache_range((unsigned long) ret, - (unsigned long) ret + size); - ret = UNCAC_ADDR(ret); - } + addr |= CONFIG_NIOS2_IO_REGION_BASE; - return ret; + return (void *)ptr; } -void arch_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) +void *cached_kernel_address(void *ptr) { - unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr); + unsigned long addr = (unsigned long)ptr; + + addr &= ~CONFIG_NIOS2_IO_REGION_BASE; + addr |= CONFIG_NIOS2_KERNEL_REGION_BASE; - free_pages(addr, get_order(size)); + return (void *)ptr; } diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 7cfb20555b10..bf326f0edd2f 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config OPENRISC diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 43e340c4cd9c..b41a79fcdbd9 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -94,15 +94,13 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, va = (unsigned long)page; - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { - /* - * We need to iterate through the pages, clearing the dcache for - * them and setting the cache-inhibit bit. - */ - if (walk_page_range(va, va + size, &walk)) { - free_pages_exact(page, size); - return NULL; - } + /* + * We need to iterate through the pages, clearing the dcache for + * them and setting the cache-inhibit bit. + */ + if (walk_page_range(va, va + size, &walk)) { + free_pages_exact(page, size); + return NULL; } return (void *)va; @@ -118,10 +116,8 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr, .mm = &init_mm }; - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) { - /* walk_page_range shouldn't be able to fail here */ - WARN_ON(walk_page_range(va, va + size, &walk)); - } + /* walk_page_range shouldn't be able to fail here */ + WARN_ON(walk_page_range(va, va + size, &walk)); free_pages_exact(vaddr, size); } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 801cad03a4c7..4f0754874d78 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -95,7 +95,7 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs) return regs->gpr[11]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index e859bfb118a6..932a8ec2b520 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -244,7 +244,7 @@ void __init trap_init(void) asmlinkage void do_trap(struct pt_regs *regs, unsigned long address) { - force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current); + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address); regs->pc += 4; } @@ -253,7 +253,7 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) { if (user_mode(regs)) { /* Send a SIGBUS */ - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); @@ -266,7 +266,7 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) { if (user_mode(regs)) { /* Send a SIGBUS */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { /* Kernel mode */ printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address); show_registers(regs); @@ -371,7 +371,7 @@ static inline void simulate_lwa(struct pt_regs *regs, unsigned long address, if (get_user(value, lwa_addr)) { if (user_mode(regs)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -418,7 +418,7 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address, if (put_user(regs->gpr[rb], vaddr)) { if (user_mode(regs)) { - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } @@ -461,7 +461,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs, if (user_mode(regs)) { /* Send a SIGILL */ - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address); } else { /* Kernel mode */ printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n", address); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 9eee5bf3db27..5d4d3a9691d0 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -209,7 +209,7 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -274,7 +274,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4860efa91d7b..42875ff15671 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -59,6 +59,8 @@ config PARISC select HAVE_ARCH_KGDB select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1) + select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE help The PA-RISC microprocessor is designed by Hewlett-Packard and used diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index c19af26febe6..8acb8fa1f8d6 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -47,6 +47,24 @@ ifneq ($(SUBARCH),$(UTS_MACHINE)) endif endif +ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_64BIT +NOP_COUNT := 8 +else +NOP_COUNT := 5 +endif + +export CC_USING_RECORD_MCOUNT:=1 +export CC_USING_PATCHABLE_FUNCTION_ENTRY:=1 + +KBUILD_AFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 +KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ + -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) + +CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/parisc/kernel/module.lds +endif + OBJCOPY_FLAGS =-O binary -R .note -R .comment -S cflags-y := -pipe @@ -102,8 +120,8 @@ PALO := $(shell if (which palo 2>&1); then : ; \ elif [ -x /sbin/palo ]; then echo /sbin/palo; \ fi) -PALOCONF := $(shell if [ -f $(src)/palo.conf ]; then echo $(src)/palo.conf; \ - else echo $(obj)/palo.conf; \ +PALOCONF := $(shell if [ -f $(srctree)/palo.conf ]; then echo $(srctree)/palo.conf; \ + else echo $(objtree)/palo.conf; \ fi) palo lifimage: vmlinuz @@ -113,8 +131,8 @@ palo lifimage: vmlinuz false; \ fi @if test ! -f "$(PALOCONF)"; then \ - cp $(src)/arch/parisc/defpalo.conf $(obj)/palo.conf; \ - echo 'A generic palo config file ($(obj)/palo.conf) has been created for you.'; \ + cp $(srctree)/arch/parisc/defpalo.conf $(objtree)/palo.conf; \ + echo 'A generic palo config file ($(objree)/palo.conf) has been created for you.'; \ echo 'You should check it and re-run "make palo".'; \ echo 'WARNING: the "lifimage" file is now placed in this directory by default!'; \ false; \ @@ -144,10 +162,10 @@ vmlinuz: vmlinux endif install: - $(CONFIG_SHELL) $(src)/arch/parisc/install.sh \ + $(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \ $(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)" zinstall: - $(CONFIG_SHELL) $(src)/arch/parisc/install.sh \ + $(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \ $(KERNELRELEASE) vmlinuz System.map "$(INSTALL_PATH)" CLEAN_FILES += lifimage diff --git a/arch/parisc/configs/a500_defconfig b/arch/parisc/configs/a500_defconfig index a8859496b0b9..3335734bfadd 100644 --- a/arch/parisc/configs/a500_defconfig +++ b/arch/parisc/configs/a500_defconfig @@ -166,6 +166,7 @@ CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m CONFIG_DEBUG_FS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/parisc/configs/b180_defconfig b/arch/parisc/configs/b180_defconfig index 0cae9664bf67..07fde5bd6974 100644 --- a/arch/parisc/configs/b180_defconfig +++ b/arch/parisc/configs/b180_defconfig @@ -90,6 +90,7 @@ CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/parisc/configs/c3000_defconfig b/arch/parisc/configs/c3000_defconfig index 6c29b841735c..64d45a8b6ca0 100644 --- a/arch/parisc/configs/c3000_defconfig +++ b/arch/parisc/configs/c3000_defconfig @@ -139,6 +139,7 @@ CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_UTF8=m CONFIG_DEBUG_FS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MUTEXES=y diff --git a/arch/parisc/configs/default_defconfig b/arch/parisc/configs/default_defconfig index 6a91cc2623e8..5b877ca34ebf 100644 --- a/arch/parisc/configs/default_defconfig +++ b/arch/parisc/configs/default_defconfig @@ -183,6 +183,7 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=y CONFIG_DEBUG_FS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index 42b2c75a1645..958c0aa5dbb2 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -5,12 +5,23 @@ #ifndef __ASSEMBLY__ extern void mcount(void); -#define MCOUNT_INSN_SIZE 4 - +#define MCOUNT_ADDR ((unsigned long)mcount) +#define MCOUNT_INSN_SIZE 4 +#define CC_USING_NOP_MCOUNT extern unsigned long sys_call_table[]; extern unsigned long return_address(unsigned int); +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_caller(void); + +struct dyn_arch_ftrace { +}; + +unsigned long ftrace_call_adjust(unsigned long addr); + +#endif + #define ftrace_return_address(n) return_address(n) #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/patch.h index 685b58a13968..400d84c6e504 100644 --- a/arch/parisc/include/asm/patch.h +++ b/arch/parisc/include/asm/patch.h @@ -4,8 +4,10 @@ /* stop machine and patch kernel text */ void patch_text(void *addr, unsigned int insn); +void patch_text_multiple(void *addr, u32 *insn, unsigned int len); /* patch kernel text with machine already stopped (e.g. in kgdb) */ -void __patch_text(void *addr, unsigned int insn); +void __patch_text(void *addr, u32 insn); +void __patch_text_multiple(void *addr, u32 *insn, unsigned int len); #endif diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index ea75cc966dae..4f2059a50fae 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include <asm/cache.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + /* Allocate the top level pgd (page directory) * * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we @@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) pmd_populate_kernel(mm, pmd, page_address(pte_page)) #define pmd_pgtable(pmd) pmd_page(pmd) -static inline pgtable_t -pte_alloc_one(struct mm_struct *mm) -{ - struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - pte_free_kernel(mm, page_address(pte)); -} - #define check_pgt_cache() do { } while (0) #endif diff --git a/arch/parisc/include/asm/psw.h b/arch/parisc/include/asm/psw.h index 76c301146c31..46921ffcc407 100644 --- a/arch/parisc/include/asm/psw.h +++ b/arch/parisc/include/asm/psw.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PARISC_PSW_H - +#define _PARISC_PSW_H #define PSW_I 0x00000001 #define PSW_D 0x00000002 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 66c5dd245ac7..10173c32195e 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -114,6 +114,8 @@ #define SO_RCVTIMEO_NEW 0x4040 #define SO_SNDTIMEO_NEW 0x4041 +#define SO_DETACH_REUSEPORT_BPF 0x4042 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index fc0df5c44468..c232266b517c 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -14,10 +14,11 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities -CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_cache.o = -pg -CFLAGS_REMOVE_perf.o = -pg -CFLAGS_REMOVE_unwind.o = -pg +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cache.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_perf.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_unwind.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_SMP) += smp.o diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 89c801c2b5d1..3e430590c1e1 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -2012,6 +2012,70 @@ ftrace_stub: #endif ENDPROC_CFI(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifdef CONFIG_64BIT +#define FTRACE_FRAME_SIZE (2*FRAME_SIZE) +#else +#define FTRACE_FRAME_SIZE FRAME_SIZE +#endif +ENTRY_CFI(ftrace_caller, caller,frame=FTRACE_FRAME_SIZE,CALLS,SAVE_RP,SAVE_SP) +ftrace_caller: + .global ftrace_caller + + STREG %r3, -FTRACE_FRAME_SIZE+1*REG_SZ(%sp) + ldo -FTRACE_FRAME_SIZE(%sp), %r3 + STREG %rp, -RP_OFFSET(%r3) + + /* Offset 0 is already allocated for %r1 */ + STREG %r23, 2*REG_SZ(%r3) + STREG %r24, 3*REG_SZ(%r3) + STREG %r25, 4*REG_SZ(%r3) + STREG %r26, 5*REG_SZ(%r3) + STREG %r28, 6*REG_SZ(%r3) + STREG %r29, 7*REG_SZ(%r3) +#ifdef CONFIG_64BIT + STREG %r19, 8*REG_SZ(%r3) + STREG %r20, 9*REG_SZ(%r3) + STREG %r21, 10*REG_SZ(%r3) + STREG %r22, 11*REG_SZ(%r3) + STREG %r27, 12*REG_SZ(%r3) + STREG %r31, 13*REG_SZ(%r3) + loadgp + ldo -16(%sp),%r29 +#endif + LDREG 0(%r3), %r25 + copy %rp, %r26 + ldo -8(%r25), %r25 + b,l ftrace_function_trampoline, %rp + copy %r3, %r24 + + LDREG -RP_OFFSET(%r3), %rp + LDREG 2*REG_SZ(%r3), %r23 + LDREG 3*REG_SZ(%r3), %r24 + LDREG 4*REG_SZ(%r3), %r25 + LDREG 5*REG_SZ(%r3), %r26 + LDREG 6*REG_SZ(%r3), %r28 + LDREG 7*REG_SZ(%r3), %r29 +#ifdef CONFIG_64BIT + LDREG 8*REG_SZ(%r3), %r19 + LDREG 9*REG_SZ(%r3), %r20 + LDREG 10*REG_SZ(%r3), %r21 + LDREG 11*REG_SZ(%r3), %r22 + LDREG 12*REG_SZ(%r3), %r27 + LDREG 13*REG_SZ(%r3), %r31 +#endif + LDREG 1*REG_SZ(%r3), %r3 + + LDREGM -FTRACE_FRAME_SIZE(%sp), %r1 + /* Adjust return point to jump back to beginning of traced function */ + ldo -4(%r1), %r1 + bv,n (%r1) + +ENDPROC_CFI(ftrace_caller) + +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER .align 8 ENTRY_CFI(return_to_handler, caller,frame=FRAME_SIZE) diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index a28f915993b1..d784ccdd8fef 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -7,17 +7,17 @@ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> * * future possible enhancements: - * - add CONFIG_DYNAMIC_FTRACE * - add CONFIG_STACK_TRACER */ #include <linux/init.h> #include <linux/ftrace.h> +#include <linux/uaccess.h> #include <asm/assembly.h> #include <asm/sections.h> #include <asm/ftrace.h> - +#include <asm/patch.h> #define __hot __attribute__ ((__section__ (".text.hot"))) @@ -50,13 +50,11 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3) { - extern ftrace_func_t ftrace_trace_function; /* depends on CONFIG_DYNAMIC_FTRACE */ - - if (ftrace_trace_function != ftrace_stub) { - /* struct ftrace_ops *op, struct pt_regs *regs); */ - ftrace_trace_function(parent, self_addr, NULL, NULL); - return; - } +#ifndef CONFIG_DYNAMIC_FTRACE + extern ftrace_func_t ftrace_trace_function; +#endif + if (ftrace_trace_function != ftrace_stub) + ftrace_trace_function(self_addr, parent, NULL, NULL); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_graph_return != (trace_func_graph_ret_t) ftrace_stub || @@ -75,3 +73,116 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, #endif } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +int ftrace_enable_ftrace_graph_caller(void) +{ + return 0; +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return 0; +} +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + return 0; +} + +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr+(FTRACE_PATCHABLE_FUNCTION_SIZE-1)*4; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE]; + u32 *tramp; + int size, ret, i; + void *ip; + +#ifdef CONFIG_64BIT + unsigned long addr2 = + (unsigned long)dereference_function_descriptor((void *)addr); + + u32 ftrace_trampoline[] = { + 0x73c10208, /* std,ma r1,100(sp) */ + 0x0c2110c1, /* ldd -10(r1),r1 */ + 0xe820d002, /* bve,n (r1) */ + addr2 >> 32, + addr2 & 0xffffffff, + 0xe83f1fd7, /* b,l,n .-14,r1 */ + }; + + u32 ftrace_trampoline_unaligned[] = { + addr2 >> 32, + addr2 & 0xffffffff, + 0x37de0200, /* ldo 100(sp),sp */ + 0x73c13e01, /* std r1,-100(sp) */ + 0x34213ff9, /* ldo -4(r1),r1 */ + 0x50213fc1, /* ldd -20(r1),r1 */ + 0xe820d002, /* bve,n (r1) */ + 0xe83f1fcf, /* b,l,n .-20,r1 */ + }; + + BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline_unaligned) > + FTRACE_PATCHABLE_FUNCTION_SIZE); +#else + u32 ftrace_trampoline[] = { + (u32)addr, + 0x6fc10080, /* stw,ma r1,40(sp) */ + 0x48213fd1, /* ldw -18(r1),r1 */ + 0xe820c002, /* bv,n r0(r1) */ + 0xe83f1fdf, /* b,l,n .-c,r1 */ + }; +#endif + + BUILD_BUG_ON(ARRAY_SIZE(ftrace_trampoline) > + FTRACE_PATCHABLE_FUNCTION_SIZE); + + size = sizeof(ftrace_trampoline); + tramp = ftrace_trampoline; + +#ifdef CONFIG_64BIT + if (rec->ip & 0x4) { + size = sizeof(ftrace_trampoline_unaligned); + tramp = ftrace_trampoline_unaligned; + } +#endif + + ip = (void *)(rec->ip + 4 - size); + + ret = probe_kernel_read(insn, ip, size); + if (ret) + return ret; + + for (i = 0; i < size / 4; i++) { + if (insn[i] != INSN_NOP) + return -EINVAL; + } + + __patch_text_multiple(ip, tramp, size); + return 0; +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + u32 insn[FTRACE_PATCHABLE_FUNCTION_SIZE]; + int i; + + for (i = 0; i < ARRAY_SIZE(insn); i++) + insn[i] = INSN_NOP; + + __patch_text_multiple((void *)rec->ip + 4 - sizeof(insn), + insn, sizeof(insn)); + return 0; +} +#endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index f241ded9239b..ac5f34993b53 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -33,9 +33,9 @@ * However, SEGREL32 is used only for PARISC unwind entries, and we want * those entries to have an absolute address, and not just an offset. * - * The unwind table mechanism has the ability to specify an offset for + * The unwind table mechanism has the ability to specify an offset for * the unwind table; however, because we split off the init functions into - * a different piece of memory, it is not possible to do this using a + * a different piece of memory, it is not possible to do this using a * single offset. Instead, we use the above hack for now. */ @@ -53,12 +53,6 @@ #include <asm/unwind.h> #include <asm/sections.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - #define RELOC_REACHABLE(val, bits) \ (( ( !((val) & (1<<((bits)-1))) && ((val)>>(bits)) != 0 ) || \ ( ((val) & (1<<((bits)-1))) && ((val)>>(bits)) != (((__typeof__(val))(~0))>>((bits)+2)))) ? \ @@ -300,7 +294,7 @@ unsigned int arch_mod_section_prepend(struct module *mod, * sizeof(struct stub_entry); } -#define CONST +#define CONST int module_frob_arch_sections(CONST Elf_Ehdr *hdr, CONST Elf_Shdr *sechdrs, CONST char *secstrings, @@ -386,7 +380,7 @@ static Elf64_Word get_got(struct module *me, unsigned long value, long addend) got[i].addr = value; out: - DEBUGP("GOT ENTRY %d[%x] val %lx\n", i, i*sizeof(struct got_entry), + pr_debug("GOT ENTRY %d[%lx] val %lx\n", i, i*sizeof(struct got_entry), value); return i * sizeof(struct got_entry); } @@ -539,7 +533,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, //unsigned long dp = (unsigned long)$global$; register unsigned long dp asm ("r27"); - DEBUGP("Applying relocate section %u to %u\n", relsec, + pr_debug("Applying relocate section %u to %u\n", relsec, targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ @@ -563,7 +557,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, #if 0 #define r(t) ELF32_R_TYPE(rel[i].r_info)==t ? #t : - DEBUGP("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n", + pr_debug("Symbol %s loc 0x%x val 0x%x addend 0x%x: %s\n", strtab + sym->st_name, (uint32_t)loc, val, addend, r(R_PARISC_PLABEL32) @@ -604,7 +598,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* See note about special handling of SEGREL32 at * the beginning of this file. */ - *loc = fsel(val, addend); + *loc = fsel(val, addend); break; case R_PARISC_SECREL32: /* 32-bit section relative address. */ @@ -683,7 +677,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, Elf_Addr loc0; unsigned int targetsec = sechdrs[relsec].sh_info; - DEBUGP("Applying relocate section %u to %u\n", relsec, + pr_debug("Applying relocate section %u to %u\n", relsec, targetsec); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ @@ -725,7 +719,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, case R_PARISC_LTOFF21L: /* LT-relative; left 21 bits */ val = get_got(me, val, addend); - DEBUGP("LTOFF21L Symbol %s loc %p val %lx\n", + pr_debug("LTOFF21L Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); val = lrsel(val, 0); @@ -736,14 +730,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* LT-relative; right 14 bits */ val = get_got(me, val, addend); val = rrsel(val, 0); - DEBUGP("LTOFF14R Symbol %s loc %p val %lx\n", + pr_debug("LTOFF14R Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); *loc = mask(*loc, 14) | reassemble_14(val); break; case R_PARISC_PCREL22F: /* PC-relative; 22 bits */ - DEBUGP("PCREL22F Symbol %s loc %p val %lx\n", + pr_debug("PCREL22F Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); val += addend; @@ -775,7 +769,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, val = get_stub(me, val, addend, ELF_STUB_GOT, loc0, targetsec); } - DEBUGP("STUB FOR %s loc %lx, val %lx+%lx at %lx\n", + pr_debug("STUB FOR %s loc %px, val %llx+%llx at %llx\n", strtab + sym->st_name, loc, sym->st_value, addend, val); val = (val - dot - 8)/4; @@ -786,6 +780,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 32-bit PC relative address */ *loc = val - dot - 8 + addend; break; + case R_PARISC_PCREL64: + /* 64-bit PC relative address */ + *loc64 = val - dot - 8 + addend; + break; case R_PARISC_DIR64: /* 64-bit effective address */ *loc64 = val + addend; @@ -795,7 +793,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* See note about special handling of SEGREL32 at * the beginning of this file. */ - *loc = fsel(val, addend); + *loc = fsel(val, addend); break; case R_PARISC_SECREL32: /* 32-bit section relative address. */ @@ -805,14 +803,14 @@ int apply_relocate_add(Elf_Shdr *sechdrs, /* 64-bit function address */ if(in_local(me, (void *)(val + addend))) { *loc64 = get_fdesc(me, val+addend); - DEBUGP("FDESC for %s at %p points to %lx\n", + pr_debug("FDESC for %s at %llx points to %llx\n", strtab + sym->st_name, *loc64, ((Elf_Fdesc *)*loc64)->addr); } else { /* if the symbol is not local to this * module then val+addend is a pointer * to the function descriptor */ - DEBUGP("Non local FPTR64 Symbol %s loc %p val %lx\n", + pr_debug("Non local FPTR64 Symbol %s loc %p val %llx\n", strtab + sym->st_name, loc, val); *loc64 = val + addend; @@ -843,7 +841,7 @@ register_unwind_table(struct module *me, end = table + sechdrs[me->arch.unwind_section].sh_size; gp = (Elf_Addr)me->core_layout.base + me->arch.got_offset; - DEBUGP("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", + pr_debug("register_unwind_table(), sect = %d at 0x%p - 0x%p (gp=0x%lx)\n", me->arch.unwind_section, table, end, gp); me->arch.unwind = unwind_table_add(me->name, 0, gp, table, end); } @@ -864,6 +862,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; + int err, symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -890,6 +889,7 @@ int module_finalize(const Elf_Ehdr *hdr, if(sechdrs[i].sh_type == SHT_SYMTAB && (sechdrs[i].sh_flags & SHF_ALLOC)) { int strindex = sechdrs[i].sh_link; + symindex = i; /* FIXME: AWFUL HACK * The cast is to drop the const from * the sechdrs pointer */ @@ -899,7 +899,7 @@ int module_finalize(const Elf_Ehdr *hdr, } } - DEBUGP("module %s: strtab %p, symhdr %p\n", + pr_debug("module %s: strtab %p, symhdr %p\n", me->name, strtab, symhdr); if(me->arch.got_count > MAX_GOTS) { @@ -918,7 +918,7 @@ int module_finalize(const Elf_Ehdr *hdr, oldptr = (void *)symhdr->sh_addr; newptr = oldptr + 1; /* we start counting at 1 */ nsyms = symhdr->sh_size / sizeof(Elf_Sym); - DEBUGP("OLD num_symtab %lu\n", nsyms); + pr_debug("OLD num_symtab %lu\n", nsyms); for (i = 1; i < nsyms; i++) { oldptr++; /* note, count starts at 1 so preincrement */ @@ -933,7 +933,7 @@ int module_finalize(const Elf_Ehdr *hdr, } nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; - DEBUGP("NEW num_symtab %lu\n", nsyms); + pr_debug("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); /* find .altinstructions section */ @@ -945,8 +945,24 @@ int module_finalize(const Elf_Ehdr *hdr, if (!strcmp(".altinstructions", secname)) /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); - } + /* For 32 bit kernels we're compiling modules with + * -ffunction-sections so we must relocate the addresses in the + *__mcount_loc section. + */ + if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (s->sh_type == SHT_REL) + err = apply_relocate((Elf_Shdr *)sechdrs, + strtab, symindex, + s - sechdrs, me); + else if (s->sh_type == SHT_RELA) + err = apply_relocate_add((Elf_Shdr *)sechdrs, + strtab, symindex, + s - sechdrs, me); + if (err) + return err; + } + } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds new file mode 100644 index 000000000000..1a9a92aca5c8 --- /dev/null +++ b/arch/parisc/kernel/module.lds @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +SECTIONS { + __mcount_loc : { + *(__patchable_function_entries) + } +} diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c index cdcd981278b3..80a0ab372802 100644 --- a/arch/parisc/kernel/patch.c +++ b/arch/parisc/kernel/patch.c @@ -17,15 +17,20 @@ struct patch { void *addr; - unsigned int insn; + u32 *insn; + unsigned int len; }; -static void __kprobes *patch_map(void *addr, int fixmap) +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags, + int *need_unmap) { unsigned long uintaddr = (uintptr_t) addr; bool module = !core_kernel_text(uintaddr); struct page *page; + *need_unmap = 0; if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) page = vmalloc_to_page(addr); else if (!module && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) @@ -33,36 +38,74 @@ static void __kprobes *patch_map(void *addr, int fixmap) else return addr; + *need_unmap = 1; set_fixmap(fixmap, page_to_phys(page)); + if (flags) + raw_spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); } -static void __kprobes patch_unmap(int fixmap) +static void __kprobes patch_unmap(int fixmap, unsigned long *flags) { clear_fixmap(fixmap); + + if (flags) + raw_spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); +} + +void __kprobes __patch_text_multiple(void *addr, u32 *insn, unsigned int len) +{ + unsigned long start = (unsigned long)addr; + unsigned long end = (unsigned long)addr + len; + unsigned long flags; + u32 *p, *fixmap; + int mapped; + + /* Make sure we don't have any aliases in cache */ + flush_kernel_vmap_range(addr, len); + flush_icache_range(start, end); + + p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, &mapped); + + while (len >= 4) { + *p++ = *insn++; + addr += sizeof(u32); + len -= sizeof(u32); + if (len && offset_in_page(addr) == 0) { + /* + * We're crossing a page boundary, so + * need to remap + */ + flush_kernel_vmap_range((void *)fixmap, + (p-fixmap) * sizeof(*p)); + if (mapped) + patch_unmap(FIX_TEXT_POKE0, &flags); + p = fixmap = patch_map(addr, FIX_TEXT_POKE0, &flags, + &mapped); + } + } + + flush_kernel_vmap_range((void *)fixmap, (p-fixmap) * sizeof(*p)); + if (mapped) + patch_unmap(FIX_TEXT_POKE0, &flags); + flush_icache_range(start, end); } -void __kprobes __patch_text(void *addr, unsigned int insn) +void __kprobes __patch_text(void *addr, u32 insn) { - void *waddr = addr; - int size; - - waddr = patch_map(addr, FIX_TEXT_POKE0); - *(u32 *)waddr = insn; - size = sizeof(u32); - flush_kernel_vmap_range(waddr, size); - patch_unmap(FIX_TEXT_POKE0); - flush_icache_range((uintptr_t)(addr), - (uintptr_t)(addr) + size); + __patch_text_multiple(addr, &insn, sizeof(insn)); } static int __kprobes patch_text_stop_machine(void *data) { struct patch *patch = data; - __patch_text(patch->addr, patch->insn); - + __patch_text_multiple(patch->addr, patch->insn, patch->len); return 0; } @@ -70,7 +113,20 @@ void __kprobes patch_text(void *addr, unsigned int insn) { struct patch patch = { .addr = addr, + .insn = &insn, + .len = sizeof(insn), + }; + + stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL); +} + +void __kprobes patch_text_multiple(void *addr, u32 *insn, unsigned int len) +{ + + struct patch patch = { + .addr = addr, .insn = insn, + .len = len }; stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 239162355b58..ca35d9a76e50 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -394,17 +394,20 @@ pcxl_dma_init(void) __initcall(pcxl_dma_init); -static void *pcxl_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { unsigned long vaddr; unsigned long paddr; int order; + if (boot_cpu_data.cpu_type != pcxl2 && boot_cpu_data.cpu_type != pcxl) + return NULL; + order = get_order(size); size = 1 << (order + PAGE_SHIFT); vaddr = pcxl_alloc_range(size); - paddr = __get_free_pages(flag | __GFP_ZERO, order); + paddr = __get_free_pages(gfp | __GFP_ZERO, order); flush_kernel_dcache_range(paddr, size); paddr = __pa(paddr); map_uncached_pages(vaddr, size, paddr); @@ -421,44 +424,19 @@ static void *pcxl_dma_alloc(struct device *dev, size_t size, return (void *)vaddr; } -static void *pcx_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) -{ - void *addr; - - if ((attrs & DMA_ATTR_NON_CONSISTENT) == 0) - return NULL; - - addr = (void *)__get_free_pages(flag | __GFP_ZERO, get_order(size)); - if (addr) - *dma_handle = (dma_addr_t)virt_to_phys(addr); - - return addr; -} - -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - - if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) - return pcxl_dma_alloc(dev, size, dma_handle, gfp, attrs); - else - return pcx_dma_alloc(dev, size, dma_handle, gfp, attrs); -} - void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { int order = get_order(size); - if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) { - size = 1 << (order + PAGE_SHIFT); - unmap_uncached_pages((unsigned long)vaddr, size); - pcxl_free_range((unsigned long)vaddr, size); + WARN_ON_ONCE(boot_cpu_data.cpu_type != pcxl2 && + boot_cpu_data.cpu_type != pcxl); - vaddr = __va(dma_handle); - } - free_pages((unsigned long)vaddr, get_order(size)); + size = 1 << (order + PAGE_SHIFT); + unmap_uncached_pages((unsigned long)vaddr, size); + pcxl_free_range((unsigned long)vaddr, size); + + free_pages((unsigned long)__va(dma_handle), order); } void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index a3d2fb4e6dd2..f642ba378ffa 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -88,9 +88,9 @@ void user_enable_single_step(struct task_struct *task) ptrace_disable(task); /* Don't wake up the task, but let the parent know something happened. */ - force_sig_fault(SIGTRAP, TRAP_TRACE, - (void __user *) (task_regs(task)->iaoq[0] & ~3), - task); + force_sig_fault_to_task(SIGTRAP, TRAP_TRACE, + (void __user *) (task_regs(task)->iaoq[0] & ~3), + task); /* notify_parent(task, SIGCHLD); */ return; } diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 848c1934680b..02895a8f2c55 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -164,7 +164,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) give_sigsegv: DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n"); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return; } diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index c9e377d59232..5022b9e179c2 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -430,3 +430,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 096e319adeb3..58dcf445e32f 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -275,7 +275,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) static void handle_gdb_break(struct pt_regs *regs, int wot) { force_sig_fault(SIGTRAP, wot, - (void __user *) (regs->iaoq[0] & ~3), current); + (void __user *) (regs->iaoq[0] & ~3)); } static void handle_break(struct pt_regs *regs) @@ -609,13 +609,13 @@ void notrace handle_interruption(int code, struct pt_regs *regs) si_code = ILL_PRVREG; give_sigill: force_sig_fault(SIGILL, si_code, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; case 12: /* Overflow Trap, let the userland signal handler do the cleanup */ force_sig_fault(SIGFPE, FPE_INTOVF, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; case 13: @@ -627,7 +627,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * to by si_addr. */ force_sig_fault(SIGFPE, FPE_CONDTRAP, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return; } /* The kernel doesn't want to handle condition codes */ @@ -739,7 +739,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) force_sig_fault(SIGSEGV, SEGV_MAPERR, (code == 7)? ((void __user *) regs->iaoq[0]) : - ((void __user *) regs->ior), current); + ((void __user *) regs->ior)); return; case 28: @@ -754,7 +754,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) task_pid_nr(current), current->comm); /* SIGBUS, for lack of a better one. */ force_sig_fault(SIGBUS, BUS_OBJERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); return; } pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); @@ -770,7 +770,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) code, fault_space, task_pid_nr(current), current->comm); force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); return; } } diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 30161b7c9ac2..237d20dd5622 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -676,14 +676,14 @@ void handle_unaligned(struct pt_regs *regs) if (ret == ERR_PAGEFAULT) { force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->ior, current); + (void __user *)regs->ior); } else { force_sigbus: /* couldn't handle it ... */ force_sig_fault(SIGBUS, BUS_ADRALN, - (void __user *)regs->ior, current); + (void __user *)regs->ior); } return; diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index cd33b4feacb1..99cd24f2ea01 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -18,6 +18,8 @@ *(.data..vm0.pgd) \ *(.data..vm0.pte) +#define CC_USING_PATCHABLE_FUNCTION_ENTRY + #include <asm-generic/vmlinux.lds.h> /* needed for the processor specific cache alignment size */ diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index c83237c0cbc1..6ce427b58836 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -104,7 +104,7 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { force_sig_fault(signalcode >> 24, signalcode & 0xffffff, - (void __user *) regs->iaoq[0], current); + (void __user *) regs->iaoq[0]); return -1; } diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index c8e8b7c05558..6dd4669ce7a5 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -403,13 +403,13 @@ bad_area: lsb = PAGE_SHIFT; force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address, - lsb, current); + lsb); return; } #endif show_signal_msg(regs, code, address, tsk, vma); - force_sig_fault(signo, si_code, (void __user *) address, current); + force_sig_fault(signo, si_code, (void __user *) address); return; } diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index c8d41b54fb19..474cd241c150 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -10,7 +10,7 @@ #include <asm/cacheflush.h> #include <asm/fixmap.h> -void set_fixmap(enum fixed_addresses idx, phys_addr_t phys) +void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); @@ -28,13 +28,16 @@ void set_fixmap(enum fixed_addresses idx, phys_addr_t phys) flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); } -void clear_fixmap(enum fixed_addresses idx) +void notrace clear_fixmap(enum fixed_addresses idx) { unsigned long vaddr = __fix_to_virt(idx); pgd_t *pgd = pgd_offset_k(vaddr); pmd_t *pmd = pmd_offset(pgd, vaddr); pte_t *pte = pte_offset_kernel(pmd, vaddr); + if (WARN_ON(pte_none(*pte))) + return; + pte_clear(&init_mm, vaddr, pte); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8c1c636308c8..f516796dd819 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -48,7 +48,7 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX # Allow randomisation to consume up to 512MB of address space (2^29). default 11 if PPC_256K_PAGES # 11 = 29 (512MB) - 18 (256K) default 13 if PPC_64K_PAGES # 13 = 29 (512MB) - 16 (64K) - default 15 if PPC_16K_PAGES # 15 = 29 (512MB) - 14 (16K) + default 15 if PPC_16K_PAGES # 15 = 29 (512MB) - 14 (16K) default 17 # 17 = 29 (512MB) - 12 (4K) config ARCH_MMAP_RND_COMPAT_BITS_MIN @@ -125,6 +125,7 @@ config PPC select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV + select ARCH_HAS_HUGEPD if HUGETLB_PAGE select ARCH_HAS_MMIOWB if PPC64 select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_PMEM_API if PPC64 @@ -167,6 +168,7 @@ config PPC select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 select HAVE_ARCH_KGDB @@ -175,6 +177,7 @@ config PPC select HAVE_ARCH_NVRAM_OPS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK + select HAVE_C_RECORDMCOUNT select HAVE_CBPF_JIT if !PPC64 select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) @@ -185,17 +188,19 @@ config PPC select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_EBPF_JIT if PPC64 select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC - select HAVE_GENERIC_GUP select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE + select HAVE_KERNEL_LZO if DEFAULT_UIMAGE select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE @@ -234,6 +239,7 @@ config PPC select OLD_SIGSUSPEND select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI + select PPC_DAWR if PPC64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE @@ -244,9 +250,9 @@ config PPC # config PPC_BARRIER_NOSPEC - bool - default y - depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E + bool + default y + depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E config EARLY_PRINTK bool @@ -370,6 +376,9 @@ config PPC_ADV_DEBUG_DAC_RANGE depends on PPC_ADV_DEBUG_REGS && 44x default y +config PPC_DAWR + bool + config ZONE_DMA bool default y if PPC_BOOK3E_64 @@ -398,7 +407,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE - ---help--- + help Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the floating-point instructions in the PowerPC instruction set. If you @@ -417,27 +426,27 @@ choice config MATH_EMULATION_FULL bool "Emulate all the floating point instructions" - ---help--- + help Select this option will enable the kernel to support to emulate all the floating point instructions. If your SoC doesn't have a FPU, you should select this. config MATH_EMULATION_HW_UNIMPLEMENTED bool "Just emulate the FPU unimplemented instructions" - ---help--- + help Select this if you know there does have a hardware FPU on your SoC, but some floating point instructions are not implemented by that. endchoice config PPC_TRANSACTIONAL_MEM - bool "Transactional Memory support for POWERPC" - depends on PPC_BOOK3S_64 - depends on SMP - select ALTIVEC - select VSX - ---help--- - Support user-mode Transactional Memory on POWERPC. + bool "Transactional Memory support for POWERPC" + depends on PPC_BOOK3S_64 + depends on SMP + select ALTIVEC + select VSX + help + Support user-mode Transactional Memory on POWERPC. config LD_HEAD_STUB_CATCH bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT @@ -457,7 +466,7 @@ config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) - ---help--- + help Say Y here to be able to disable and re-enable individual CPUs at runtime on SMP machines. @@ -825,7 +834,7 @@ config PPC_DENORMALISATION bool "PowerPC denormalisation exception handling" depends on PPC_BOOK3S_64 default "y" if PPC_POWERNV - ---help--- + help Add support for handling denormalisation of single precision values. Useful for bare metal only. If unsure say Y here. @@ -898,7 +907,7 @@ config PPC_MEM_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/vm/protection-keys.rst + For details, see Documentation/core-api/protection-keys.rst If unsure, say y. @@ -938,7 +947,7 @@ config FSL_SOC bool config FSL_PCI - bool + bool select ARCH_HAS_DMA_SET_MASK select PPC_INDIRECT_PCI select PCI_QUIRKS @@ -986,7 +995,7 @@ config FSL_RIO bool "Freescale Embedded SRIO Controller support" depends on RAPIDIO = y && HAVE_RAPIDIO default "n" - ---help--- + help Include support for RapidIO controller on Freescale embedded processors (MPC8548, MPC8641, etc). @@ -1050,14 +1059,14 @@ config DYNAMIC_MEMSTART select NONSTATIC_KERNEL help This option enables the kernel to be loaded at any page aligned - physical address. The kernel creates a mapping from KERNELBASE to + physical address. The kernel creates a mapping from KERNELBASE to the address where the kernel is loaded. The page size here implies the TLB page size of the mapping for kernel on the particular platform. Please refer to the init code for finding the TLB page size. DYNAMIC_MEMSTART is an easy way of implementing pseudo-RELOCATABLE kernel image, where the only restriction is the page aligned kernel - load address. When this option is enabled, the compile time physical + load address. When this option is enabled, the compile time physical address CONFIG_PHYSICAL_START is ignored. This option is overridden by CONFIG_RELOCATABLE diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index 32034a0cc554..6610665fcf5e 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -44,5 +44,3 @@ fdt_sw.c fdt_wip.c libfdt.h libfdt_internal.h -autoconf.h - diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73d1f3562978..6841bd52738b 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -20,9 +20,6 @@ all: $(obj)/zImage -compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP -compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ - ifdef CROSS32_COMPILE BOOTCC := $(CROSS32_COMPILE)gcc BOOTAR := $(CROSS32_COMPILE)ar @@ -34,7 +31,7 @@ endif BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ - -D$(compress-y) + $(LINUXINCLUDE) ifdef CONFIG_PPC64_BOOT_WRAPPER BOOTCFLAGS += -m64 @@ -51,7 +48,7 @@ BOOTCFLAGS += -mlittle-endian BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif -BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc +BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -nostdinc BOOTARFLAGS := -cr$(KBUILD_ARFLAGS) @@ -202,14 +199,9 @@ $(obj)/empty.c: $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S $(Q)cp $< $@ -$(srctree)/$(src)/serial.c: $(obj)/autoconf.h - -$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/% - $(Q)cp $< $@ - clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ $(zlib-decomp-) $(libfdt) $(libfdtheader) \ - autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds + empty.c zImage.coff.lds zImage.ps3.lds zImage.lds quiet_cmd_bootcc = BOOTCC $@ cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< @@ -257,6 +249,8 @@ endif compressor-$(CONFIG_KERNEL_GZIP) := gz compressor-$(CONFIG_KERNEL_XZ) := xz +compressor-$(CONFIG_KERNEL_LZMA) := lzma +compressor-$(CONFIG_KERNEL_LZO) := lzo # args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd quiet_cmd_wrap = WRAP $@ diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c index b0491b8c0199..9457863147f9 100644 --- a/arch/powerpc/boot/serial.c +++ b/arch/powerpc/boot/serial.c @@ -18,7 +18,6 @@ #include "stdio.h" #include "io.h" #include "ops.h" -#include "autoconf.h" static int serial_open(void) { diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 532d45833396..5148ac271f28 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -40,6 +40,7 @@ dts= cacheit= binary= compression=.gz +uboot_comp=gzip pie= format= @@ -130,22 +131,29 @@ while [ "$#" -gt 0 ]; do ;; -z) compression=.gz + uboot_comp=gzip ;; -Z) shift [ "$#" -gt 0 ] || usage - [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "none" ] || usage + [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "lzo" -o "$1" != "none" ] || usage compression=".$1" + uboot_comp=$1 if [ $compression = ".none" ]; then compression= + uboot_comp=none fi + if [ $uboot_comp = "gz" ]; then + uboot_comp=gzip + fi ;; --no-gzip) # a "feature" of the the wrapper script is that it can be used outside # the kernel tree. So keeping this around for backwards compatibility. compression= + uboot_comp=none ;; -?) usage @@ -365,9 +373,16 @@ if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel .gz) gzip -n -f -9 "$vmz.$$" ;; + .lzma) + xz --format=lzma -f -6 "$vmz.$$" + ;; + .lzo) + lzop -f -9 "$vmz.$$" + ;; *) # drop the compression suffix so the stripped vmlinux is used compression= + uboot_comp=none ;; esac @@ -411,7 +426,7 @@ membase=`${CROSS}objdump -p "$kernel" | grep -m 1 LOAD | awk '{print $7}'` case "$platform" in uboot) rm -f "$ofile" - ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a $membase -e $membase \ + ${MKIMAGE} -A ppc -O linux -T kernel -C $uboot_comp -a $membase -e $membase \ $uboot_version -d "$vmz" "$ofile" if [ -z "$cacheit" ]; then rm -f "$vmz" diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h index e22e5b3770dd..ebfadd39e192 100644 --- a/arch/powerpc/boot/xz_config.h +++ b/arch/powerpc/boot/xz_config.h @@ -20,10 +20,30 @@ static inline uint32_t swab32p(void *p) #ifdef __LITTLE_ENDIAN__ #define get_le32(p) (*((uint32_t *) (p))) +#define cpu_to_be32(x) swab32(x) +static inline u32 be32_to_cpup(const u32 *p) +{ + return swab32p((u32 *)p); +} #else #define get_le32(p) swab32p(p) +#define cpu_to_be32(x) (x) +static inline u32 be32_to_cpup(const u32 *p) +{ + return *p; +} #endif +static inline uint32_t get_unaligned_be32(const void *p) +{ + return be32_to_cpup(p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + *((u32 *)p) = cpu_to_be32(val); +} + #define memeq(a, b, size) (memcmp(a, b, size) == 0) #define memzero(buf, size) memset(buf, 0, size) diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index e57344c3b0d7..5a75e4f14273 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index 0f66f8a87be8..e2691c5db766 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index 3da091f651d6..949989ef2322 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig index caab658d1da1..4347a87088dc 100644 --- a/arch/powerpc/configs/40x/klondike_defconfig +++ b/arch/powerpc/configs/40x/klondike_defconfig @@ -14,7 +14,6 @@ CONFIG_APM8018X=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_MATH_EMULATION=y # CONFIG_SUSPEND is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_SCSI=y diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index e0b1489b7c7b..90b759bbf426 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -21,7 +21,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 38d3d7769a2f..881c300c011d 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig index a2b2770eee8f..5e7c61d1d7d0 100644 --- a/arch/powerpc/configs/40x/virtex_defconfig +++ b/arch/powerpc/configs/40x/virtex_defconfig @@ -31,7 +31,6 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig index 6faa03cd661c..0ed46704b9fa 100644 --- a/arch/powerpc/configs/40x/walnut_defconfig +++ b/arch/powerpc/configs/40x/walnut_defconfig @@ -19,7 +19,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 9fcd361607e2..2fa553ebfdc9 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -33,7 +33,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 6bba1a55b827..5a1b9ee18075 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index 6f3a6ecc81e7..22e1ef5272ab 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index 6b77aea79b6c..8006a5728afd 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -20,7 +20,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index d427cee027a6..86f34ea4173a 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 5f1df5fe4453..ce3ec5a2cd15 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -31,7 +31,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig index e2b6578993d5..f67447c92e6f 100644 --- a/arch/powerpc/configs/44x/ebony_defconfig +++ b/arch/powerpc/configs/44x/ebony_defconfig @@ -20,7 +20,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index f593258806ad..5dbd83a1c11b 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index bae6b26bcfba..e49114f0e526 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -44,7 +44,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set CONFIG_VLAN_8021Q=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 4453a4590b1a..fa5378af44f9 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index d24bfa6ecd62..aae879c21239 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -33,7 +33,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index 5d3f685a7af8..56eddca998c6 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index 7b8355a5698d..369bfd2e451d 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index 918cfb63f0c8..8be95f6fe3a7 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 63302fbd184d..974a4f038cda 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index f34fee9464e5..10e517b69fa4 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index 42cc7b4ed95f..cd08f3ddd609 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig index 99cc3dc02df1..1f74079e1703 100644 --- a/arch/powerpc/configs/44x/virtex5_defconfig +++ b/arch/powerpc/configs/44x/virtex5_defconfig @@ -30,7 +30,6 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 6ae88d4879bf..af66c69c49fe 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_VLAN_8021Q=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig index 73948e88ac82..2412a6bf7ee6 100644 --- a/arch/powerpc/configs/52xx/cm5200_defconfig +++ b/arch/powerpc/configs/52xx/cm5200_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 6fc7f786c83c..63368e677506 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index ae2a1f74103b..72762da94846 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 1554de6968ca..303600ff1fdb 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -36,7 +36,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index 0777e6efd22d..a3c8ca74032c 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig index dd884df32dfd..10192410b33c 100644 --- a/arch/powerpc/configs/83xx/asp8347_defconfig +++ b/arch/powerpc/configs/83xx/asp8347_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index 9dffb2e7f735..16a42e2267fb 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index a42232732c6d..80d40ae668eb 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig index 4f914906ee4b..e94555452fb2 100644 --- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index a484eb8401e8..1715ff547442 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index 37f4d93b3f81..e65c0057147f 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CFI=y diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index 7adb6708a761..17714bf0ed40 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CFI=y diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig index d7ce3551529d..e2ff684d8792 100644 --- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig index 92134cee3f37..3eceb6db2982 100644 --- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig index 97f7ea5f205f..093df33f9455 100644 --- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig index ee7510a33d06..3f5e5d10789f 100644 --- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index 8966a9af4230..dad53ef86b49 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -26,7 +26,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index d70b60314dad..920f37316fdb 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -65,7 +65,6 @@ CONFIG_INET6_AH=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NET_PKTGEN=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig index 9ce6f48cfb61..9cb211fb6d1e 100644 --- a/arch/powerpc/configs/85xx/ksi8560_defconfig +++ b/arch/powerpc/configs/85xx/ksi8560_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index 5fbc3f904046..618e03e0706d 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index ff981d7905c7..9bc6283f2fb2 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index 974f0706d777..0683d8c292a8 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig index 7e3e84a842e4..258881727119 100644 --- a/arch/powerpc/configs/85xx/sbc8548_defconfig +++ b/arch/powerpc/configs/85xx/sbc8548_defconfig @@ -22,7 +22,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index 5b9cc01b9098..ecbcc853307d 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -22,7 +22,6 @@ CONFIG_NETFILTER=y CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_NET_PKTGEN=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_PARPORT=m CONFIG_PARPORT_PC=m diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index 1c63cbdc3211..afa1b9b633f8 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -29,7 +29,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CFI=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 78f5beb2928c..d50aca608736 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -54,7 +54,6 @@ CONFIG_IP_PIMSM_V2=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 935ea3ade7de..f7a803ab2285 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -26,7 +26,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 12f397d403c6..cf94d28d0e31 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -37,7 +37,6 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_IP_NF_MANGLE is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_PARPORT=y CONFIG_PARPORT_PC=y diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 560a93a84efe..2dd1b58a18ae 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -102,7 +102,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index a203b1cf67d3..9ff493dd8439 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -38,7 +38,6 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_IP_NF_MANGLE is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 2e6c8a45ae88..6e08d9502d89 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index 7cb590e8f8fd..b20bd0cf3543 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -28,7 +28,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index d592ba27b122..3c7dad19a691 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -118,7 +118,6 @@ CONFIG_SYSVIPC=y CONFIG_TMPFS=y CONFIG_UBIFS_FS=y CONFIG_UDF_FS=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_UFS_FS=m CONFIG_UIO=y CONFIG_UNIX=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index ceb3c770786f..fbfcc85e4dc0 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -52,7 +52,6 @@ CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y @@ -244,7 +243,6 @@ CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_LATENCYTOP=y CONFIG_BOOTX_TEXT=y CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index 805b0f87653c..85e73c3bd859 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -35,7 +35,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y @@ -91,7 +90,6 @@ CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_DMA_API_DEBUG=y CONFIG_PPC_EARLY_DEBUG=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 71d8d2430b6c..067f433c8f5e 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index 477794c41d50..ea59f3d146df 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -48,7 +48,6 @@ CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index c5f2005005d3..2975e64629aa 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -36,7 +36,6 @@ CONFIG_IP_MULTICAST=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 # CONFIG_SCSI_PROC_FS is not set @@ -104,7 +103,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_LATENCYTOP=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index 5d5f08e5b8d9..6ce4f206eac7 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -30,7 +30,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_TIPC=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e4bf8aa87e60..6203c1093a3a 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -35,7 +35,6 @@ CONFIG_CAN_VCAN=y CONFIG_CAN_MSCAN=y CONFIG_CAN_DEBUG_DEVICES=y # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_PREVENT_FIRMWARE_BUILD is not set diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 7a2b2aa37def..6f87a5c74960 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -27,7 +27,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig index 4b14c02b437c..19406a6c2648 100644 --- a/arch/powerpc/configs/mpc7448_hpc2_defconfig +++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index b1e88b64536b..00a4d2bf43b2 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -23,7 +23,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 005d00020fb9..be125729635c 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -37,7 +37,6 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y CONFIG_INET_ESP=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_FW_LOADER is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index ec3fcc2bf737..285d506c5a76 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -27,7 +27,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 63e38c7220f1..0a0d046fc445 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -58,7 +58,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_LAPB=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index c0423b2cf7c0..4b6d31d4474e 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -44,7 +44,6 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=y CONFIG_INET_ESP=y # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 50b610b48914..7e6654848531 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -112,7 +112,6 @@ CONFIG_BT_HCIBFUSB=m CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_LEDS=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_CONNECTOR=y CONFIG_MAC_FLOPPY=m @@ -293,7 +292,6 @@ CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_LATENCYTOP=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig index ef2ef98d3f28..34219d555e8a 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -98,7 +98,6 @@ CONFIG_NET_ACT_BPF=m CONFIG_DNS_RESOLVER=y CONFIG_BPF_JIT=y # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y @@ -317,7 +316,6 @@ CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y -CONFIG_LATENCYTOP=y CONFIG_FUNCTION_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 689d7e276769..8f136b52198b 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -25,7 +25,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index db48039e0b11..67952819593e 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -36,7 +36,6 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_BRIDGE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 91fdb619b484..dc83fefa04f7 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -89,7 +89,7 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -# CONFIG_IPV6 is not set +CONFIG_IPV6=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m @@ -98,7 +98,6 @@ CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_BPF=m CONFIG_BPF_JIT=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_FD=y @@ -367,7 +366,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y CONFIG_DEBUG_MUTEXES=y -CONFIG_LATENCYTOP=y CONFIG_FUNCTION_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 41d85cb3c9a2..0d746774c2bd 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -50,7 +50,6 @@ CONFIG_INET_IPCOMP=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_FD=y @@ -223,7 +222,6 @@ CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_LATENCYTOP=y CONFIG_IRQSOFF_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index 7c6baf6df139..9dca4cffa623 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -301,7 +301,6 @@ CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_IRDA=m CONFIG_IRLAN=m CONFIG_IRNET=m @@ -346,7 +345,6 @@ CONFIG_MAC80211_LEDS=y CONFIG_MAC80211_DEBUGFS=y CONFIG_NET_9P=m CONFIG_NET_9P_VIRTIO=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_PARPORT=m @@ -1124,6 +1122,7 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y CONFIG_UNUSED_SYMBOLS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y @@ -1148,7 +1147,6 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y -CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 0ededa8c837d..9d8a76857c6f 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -24,7 +24,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index cf8d55f67272..314c63939816 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -63,7 +63,6 @@ CONFIG_CFG80211=m CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m # CONFIG_MAC80211_RC_MINSTREL is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65535 diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 62e12f61a3b2..38abc9c1770a 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -83,7 +83,6 @@ CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_BPF=m CONFIG_BPF_JIT=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_PARPORT=m @@ -290,7 +289,6 @@ CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HARDLOCKUP_DETECTOR=y -CONFIG_LATENCYTOP=y CONFIG_FUNCTION_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index a887616e35a2..557b530b2f70 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -68,7 +68,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_BEET is not set CONFIG_DNS_RESOLVER=y # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=m diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 74bca2eccd0f..6c39c52b8e4a 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -26,7 +26,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index cd72193fac0a..7493f36dd6e9 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -32,7 +32,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index f5c366b02828..5a04448ad6b5 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -41,7 +41,6 @@ CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_HIDP=y CONFIG_CFG80211=y CONFIG_MAC80211=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y @@ -123,7 +122,6 @@ CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_LATENCYTOP=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_DMA_API_DEBUG=y diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 52eafaf74054..31c231ea56b7 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -297,24 +297,24 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) #define ATOMIC64_INIT(i) { (i) } -static __inline__ long atomic64_read(const atomic64_t *v) +static __inline__ s64 atomic64_read(const atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); return t; } -static __inline__ void atomic64_set(atomic64_t *v, long i) +static __inline__ void atomic64_set(atomic64_t *v, s64 i) { __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i)); } #define ATOMIC64_OP(op, asm_op) \ -static __inline__ void atomic64_##op(long a, atomic64_t *v) \ +static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \ { \ - long t; \ + s64 t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%3 # atomic64_" #op "\n" \ @@ -327,10 +327,10 @@ static __inline__ void atomic64_##op(long a, atomic64_t *v) \ } #define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \ -static inline long \ -atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ +static inline s64 \ +atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ { \ - long t; \ + s64 t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \ @@ -345,10 +345,10 @@ atomic64_##op##_return_relaxed(long a, atomic64_t *v) \ } #define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \ -static inline long \ -atomic64_fetch_##op##_relaxed(long a, atomic64_t *v) \ +static inline s64 \ +atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ { \ - long res, t; \ + s64 res, t; \ \ __asm__ __volatile__( \ "1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \ @@ -396,7 +396,7 @@ ATOMIC64_OPS(xor, xor) static __inline__ void atomic64_inc(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_inc\n\ @@ -409,9 +409,9 @@ static __inline__ void atomic64_inc(atomic64_t *v) } #define atomic64_inc atomic64_inc -static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) +static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n" @@ -427,7 +427,7 @@ static __inline__ long atomic64_inc_return_relaxed(atomic64_t *v) static __inline__ void atomic64_dec(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_dec\n\ @@ -440,9 +440,9 @@ static __inline__ void atomic64_dec(atomic64_t *v) } #define atomic64_dec atomic64_dec -static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) +static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( "1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n" @@ -463,9 +463,9 @@ static __inline__ long atomic64_dec_return_relaxed(atomic64_t *v) * Atomically test *v and decrement if it is greater than 0. * The function returns the old value of *v minus 1. */ -static __inline__ long atomic64_dec_if_positive(atomic64_t *v) +static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v) { - long t; + s64 t; __asm__ __volatile__( PPC_ATOMIC_ENTRY_BARRIER @@ -502,9 +502,9 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long t; + s64 t; __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER @@ -534,7 +534,7 @@ static __inline__ long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) */ static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { - long t1, t2; + s64 t1, t2; __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 74d24201fc4f..23b83d3593e2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -116,8 +116,6 @@ typedef struct { /* Number of users of the external (Nest) MMU */ atomic_t copros; - /* NPU NMMU context */ - struct npu_context *npu_context; struct hash_mm_context *hash_context; unsigned long vdso_base; diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ccf00a8b98c6..62e6ea0a7650 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -274,8 +274,15 @@ extern unsigned long __vmalloc_end; #define VMALLOC_START __vmalloc_start #define VMALLOC_END __vmalloc_end +static inline unsigned int ioremap_max_order(void) +{ + if (radix_enabled()) + return PUD_SHIFT; + return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */ +} +#define IOREMAP_MAX_ORDER ioremap_max_order() + extern unsigned long __kernel_virt_start; -extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; extern unsigned long __kernel_io_end; #define KERN_VIRT_START __kernel_virt_start @@ -1343,5 +1350,26 @@ static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_va return false; } +/* + * Like pmd_huge() and pmd_large(), but works regardless of config options + */ +#define pmd_is_leaf pmd_is_leaf +static inline bool pmd_is_leaf(pmd_t pmd) +{ + return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)); +} + +#define pud_is_leaf pud_is_leaf +static inline bool pud_is_leaf(pud_t pud) +{ + return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE)); +} + +#define pgd_is_leaf pgd_is_leaf +static inline bool pgd_is_leaf(pgd_t pgd) +{ + return !!(pgd_raw(pgd) & cpu_to_be64(_PAGE_PTE)); +} + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 574eca33f893..e04a839cb5b9 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -266,6 +266,9 @@ extern void radix__vmemmap_remove_mapping(unsigned long start, extern int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int psz); +extern int radix__ioremap_range(unsigned long ea, phys_addr_t pa, + unsigned long size, pgprot_t prot, int nid); + static inline unsigned long radix__get_tree_size(void) { unsigned long rts_field; diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 40ea5b3781c6..b3388d95f451 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -33,7 +33,8 @@ #define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT) -#if defined(__powerpc64__) && !defined(__ASSEMBLY__) +#if !defined(__ASSEMBLY__) +#ifdef CONFIG_PPC64 struct ppc_cache_info { u32 size; @@ -53,7 +54,28 @@ struct ppc64_caches { }; extern struct ppc64_caches ppc64_caches; -#endif /* __powerpc64__ && ! __ASSEMBLY__ */ + +static inline u32 l1_cache_shift(void) +{ + return ppc64_caches.l1d.log_block_size; +} + +static inline u32 l1_cache_bytes(void) +{ + return ppc64_caches.l1d.block_size; +} +#else +static inline u32 l1_cache_shift(void) +{ + return L1_CACHE_SHIFT; +} + +static inline u32 l1_cache_bytes(void) +{ + return L1_CACHE_BYTES; +} +#endif +#endif /* ! __ASSEMBLY__ */ #if defined(__ASSEMBLY__) /* @@ -85,22 +107,22 @@ extern void _set_L3CR(unsigned long); static inline void dcbz(void *addr) { - __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbz %y0" : : "Z"(*(u8 *)addr) : "memory"); } static inline void dcbi(void *addr) { - __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbi %y0" : : "Z"(*(u8 *)addr) : "memory"); } static inline void dcbf(void *addr) { - __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbf %y0" : : "Z"(*(u8 *)addr) : "memory"); } static inline void dcbst(void *addr) { - __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory"); + __asm__ __volatile__ ("dcbst %y0" : : "Z"(*(u8 *)addr) : "memory"); } #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 74d60cfe8ce5..eef388f2659f 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -29,9 +29,12 @@ * not expect this type of fault. flush_cache_vmap is not exactly the right * place to put this, but it seems to work well enough. */ -#define flush_cache_vmap(start, end) do { asm volatile("ptesync" ::: "memory"); } while (0) +static inline void flush_cache_vmap(unsigned long start, unsigned long end) +{ + asm volatile("ptesync" ::: "memory"); +} #else -#define flush_cache_vmap(start, end) do { } while (0) +static inline void flush_cache_vmap(unsigned long start, unsigned long end) { } #endif #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 @@ -54,20 +57,29 @@ static inline void __flush_dcache_icache_phys(unsigned long physaddr) } #endif -#ifdef CONFIG_PPC32 /* * Write any modified data cache blocks out to memory and invalidate them. * Does not invalidate the corresponding instruction cache blocks. */ static inline void flush_dcache_range(unsigned long start, unsigned long stop) { - void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); - unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long shift = l1_cache_shift(); + unsigned long bytes = l1_cache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; - for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + if (IS_ENABLED(CONFIG_PPC64)) { + mb(); /* sync */ + isync(); + } + + for (i = 0; i < size >> shift; i++, addr += bytes) dcbf(addr); mb(); /* sync */ + + if (IS_ENABLED(CONFIG_PPC64)) + isync(); } /* @@ -77,11 +89,13 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop) */ static inline void clean_dcache_range(unsigned long start, unsigned long stop) { - void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); - unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long shift = l1_cache_shift(); + unsigned long bytes = l1_cache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; - for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + for (i = 0; i < size >> shift; i++, addr += bytes) dcbst(addr); mb(); /* sync */ } @@ -94,21 +108,17 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop) static inline void invalidate_dcache_range(unsigned long start, unsigned long stop) { - void *addr = (void *)(start & ~(L1_CACHE_BYTES - 1)); - unsigned long size = stop - (unsigned long)addr + (L1_CACHE_BYTES - 1); + unsigned long shift = l1_cache_shift(); + unsigned long bytes = l1_cache_bytes(); + void *addr = (void *)(start & ~(bytes - 1)); + unsigned long size = stop - (unsigned long)addr + (bytes - 1); unsigned long i; - for (i = 0; i < size >> L1_CACHE_SHIFT; i++, addr += L1_CACHE_BYTES) + for (i = 0; i < size >> shift; i++, addr += bytes) dcbi(addr); mb(); /* sync */ } -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 -extern void flush_dcache_range(unsigned long start, unsigned long stop); -extern void flush_inval_dcache_range(unsigned long start, unsigned long stop); -#endif - #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ do { \ memcpy(dst, src, len); \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 841a0be6c1b2..33f4f72eb035 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -30,25 +30,13 @@ * exception handlers (including pSeries LPAR) and iSeries LPAR * implementations as possible. */ -#include <asm/head-64.h> #include <asm/feature-fixups.h> -/* PACA save area offsets (exgen, exmc, etc) */ -#define EX_R9 0 -#define EX_R10 8 -#define EX_R11 16 -#define EX_R12 24 -#define EX_R13 32 -#define EX_DAR 40 -#define EX_DSISR 48 -#define EX_CCR 52 -#define EX_CFAR 56 -#define EX_PPR 64 +/* PACA save area size in u64 units (exgen, exmc, etc) */ #if defined(CONFIG_RELOCATABLE) -#define EX_CTR 72 -#define EX_SIZE 10 /* size in u64 units */ +#define EX_SIZE 10 #else -#define EX_SIZE 9 /* size in u64 units */ +#define EX_SIZE 9 #endif /* @@ -56,12 +44,7 @@ */ #define MAX_MCE_DEPTH 4 -/* - * EX_R3 is only used by the bad_stack handler. bad_stack reloads and - * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap - * with EX_DAR. - */ -#define EX_R3 EX_DAR +#ifdef __ASSEMBLY__ #define STF_ENTRY_BARRIER_SLOT \ STF_ENTRY_BARRIER_FIXUP_SECTION; \ @@ -144,588 +127,6 @@ hrfid; \ b hrfi_flush_fallback -#ifdef CONFIG_RELOCATABLE -#define __EXCEPTION_PROLOG_2_RELON(label, h) \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label); \ - mtctr r12; \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - li r10,MSR_RI; \ - mtmsrd r10,1; /* Set RI (EE=0) */ \ - bctr; -#else -/* If not relocatable, we can jump directly -- and save messing with LR */ -#define __EXCEPTION_PROLOG_2_RELON(label, h) \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - li r10,MSR_RI; \ - mtmsrd r10,1; /* Set RI (EE=0) */ \ - b label; -#endif -#define EXCEPTION_PROLOG_2_RELON(label, h) \ - __EXCEPTION_PROLOG_2_RELON(label, h) - -/* - * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to - * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case - * EXCEPTION_PROLOG_2_RELON will be using LR. - */ -#define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_RELON(label, h) - -/* - * We're short on space and time in the exception prolog, so we can't - * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. - * Instead we get the base of the kernel from paca->kernelbase and or in the low - * part of label. This requires that the label be within 64KB of kernelbase, and - * that kernelbase be 64K aligned. - */ -#define LOAD_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); - -#define __LOAD_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; - -/* - * Branches from unrelocated code (e.g., interrupts) to labels outside - * head-y require >64K offsets. - */ -#define __LOAD_FAR_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; \ - addis reg,reg,(ABS_ADDR(label))@h; - -/* Exception register prefixes */ -#define EXC_HV H -#define EXC_STD - -#if defined(CONFIG_RELOCATABLE) -/* - * If we support interrupts with relocation on AND we're a relocatable kernel, - * we need to use CTR to get to the 2nd level handler. So, save/restore it - * when required. - */ -#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13) -#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13) -#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg -#else -/* ...else CTR is unused and in register. */ -#define SAVE_CTR(reg, area) -#define GET_CTR(reg, area) mfctr reg -#define RESTORE_CTR(reg, area) -#endif - -/* - * PPR save/restore macros used in exceptions_64s.S - * Used for P7 or later processors - */ -#define SAVE_PPR(area, ra) \ -BEGIN_FTR_SECTION_NESTED(940) \ - ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ - std ra,_PPR(r1); \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) - -#define RESTORE_PPR_PACA(area, ra) \ -BEGIN_FTR_SECTION_NESTED(941) \ - ld ra,area+EX_PPR(r13); \ - mtspr SPRN_PPR,ra; \ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) - -/* - * Get an SPR into a register if the CPU has the given feature - */ -#define OPT_GET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mfspr ra,spr; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Set an SPR from a register if the CPU has the given feature - */ -#define OPT_SET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mtspr spr,ra; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Save a register to the PACA if the CPU has the given feature - */ -#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - std ra,offset(r13); \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ - std r9,area+EX_R9(r13); /* save r9 */ \ - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ - HMT_MEDIUM; \ - std r10,area+EX_R10(r13); /* save r10 - r12 */ \ - OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) - -#define __EXCEPTION_PROLOG_1_PRE(area) \ - OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ - OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ - INTERRUPT_TO_KERNEL; \ - SAVE_CTR(r10, area); \ - mfcr r9; - -#define __EXCEPTION_PROLOG_1_POST(area) \ - std r11,area+EX_R11(r13); \ - std r12,area+EX_R12(r13); \ - GET_SCRATCH0(r10); \ - std r10,area+EX_R13(r13) - -/* - * This version of the EXCEPTION_PROLOG_1 will carry - * addition parameter called "bitmask" to support - * checking of the interrupt maskable level in the SOFTEN_TEST. - * Intended to be used in MASKABLE_EXCPETION_* macros. - */ -#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ - __EXCEPTION_PROLOG_1_PRE(area); \ - extra(vec, bitmask); \ - __EXCEPTION_PROLOG_1_POST(area); - -/* - * This version of the EXCEPTION_PROLOG_1 is intended - * to be used in STD_EXCEPTION* macros - */ -#define _EXCEPTION_PROLOG_1(area, extra, vec) \ - __EXCEPTION_PROLOG_1_PRE(area); \ - extra(vec); \ - __EXCEPTION_PROLOG_1_POST(area); - -#define EXCEPTION_PROLOG_1(area, extra, vec) \ - _EXCEPTION_PROLOG_1(area, extra, vec) - -#define __EXCEPTION_PROLOG_2(label, h) \ - ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label) \ - mtspr SPRN_##h##SRR0,r12; \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - mtspr SPRN_##h##SRR1,r10; \ - h##RFI_TO_KERNEL; \ - b . /* prevent speculative execution */ -#define EXCEPTION_PROLOG_2(label, h) \ - __EXCEPTION_PROLOG_2(label, h) - -/* _NORI variant keeps MSR_RI clear */ -#define __EXCEPTION_PROLOG_2_NORI(label, h) \ - ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ - xori r10,r10,MSR_RI; /* Clear MSR_RI */ \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label) \ - mtspr SPRN_##h##SRR0,r12; \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - mtspr SPRN_##h##SRR1,r10; \ - h##RFI_TO_KERNEL; \ - b . /* prevent speculative execution */ - -#define EXCEPTION_PROLOG_2_NORI(label, h) \ - __EXCEPTION_PROLOG_2_NORI(label, h) - -#define EXCEPTION_PROLOG(area, label, h, extra, vec) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2(label, h); - -#define __KVMTEST(h, n) \ - lbz r10,HSTATE_IN_GUEST(r13); \ - cmpwi r10,0; \ - bne do_kvm_##h##n - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -/* - * If hv is possible, interrupts come into to the hv version - * of the kvmppc_interrupt code, which then jumps to the PR handler, - * kvmppc_interrupt_pr, if the guest is a PR guest. - */ -#define kvmppc_interrupt kvmppc_interrupt_hv -#else -#define kvmppc_interrupt kvmppc_interrupt_pr -#endif - -/* - * Branch to label using its 0xC000 address. This results in instruction - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned - * on using mtmsr rather than rfid. - * - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than - * load KBASE for a slight optimisation. - */ -#define BRANCH_TO_C000(reg, label) \ - __LOAD_HANDLER(reg, label); \ - mtctr reg; \ - bctr - -#ifdef CONFIG_RELOCATABLE -#define BRANCH_TO_COMMON(reg, label) \ - __LOAD_HANDLER(reg, label); \ - mtctr reg; \ - bctr - -#define BRANCH_LINK_TO_FAR(label) \ - __LOAD_FAR_HANDLER(r12, label); \ - mtctr r12; \ - bctrl - -/* - * KVM requires __LOAD_FAR_HANDLER. - * - * __BRANCH_TO_KVM_EXIT branches are also a special case because they - * explicitly use r9 then reload it from PACA before branching. Hence - * the double-underscore. - */ -#define __BRANCH_TO_KVM_EXIT(area, label) \ - mfctr r9; \ - std r9,HSTATE_SCRATCH1(r13); \ - __LOAD_FAR_HANDLER(r9, label); \ - mtctr r9; \ - ld r9,area+EX_R9(r13); \ - bctr - -#else -#define BRANCH_TO_COMMON(reg, label) \ - b label - -#define BRANCH_LINK_TO_FAR(label) \ - bl label - -#define __BRANCH_TO_KVM_EXIT(area, label) \ - ld r9,area+EX_R9(r13); \ - b label - -#endif - -/* Do not enable RI */ -#define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ - EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_NORI(label, h); - - -#define __KVM_HANDLER(area, h, n) \ - BEGIN_FTR_SECTION_NESTED(947) \ - ld r10,area+EX_CFAR(r13); \ - std r10,HSTATE_CFAR(r13); \ - END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \ - BEGIN_FTR_SECTION_NESTED(948) \ - ld r10,area+EX_PPR(r13); \ - std r10,HSTATE_PPR(r13); \ - END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ - ld r10,area+EX_R10(r13); \ - std r12,HSTATE_SCRATCH0(r13); \ - sldi r12,r9,32; \ - ori r12,r12,(n); \ - /* This reloads r9 before branching to kvmppc_interrupt */ \ - __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt) - -#define __KVM_HANDLER_SKIP(area, h, n) \ - cmpwi r10,KVM_GUEST_MODE_SKIP; \ - beq 89f; \ - BEGIN_FTR_SECTION_NESTED(948) \ - ld r10,area+EX_PPR(r13); \ - std r10,HSTATE_PPR(r13); \ - END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ - ld r10,area+EX_R10(r13); \ - std r12,HSTATE_SCRATCH0(r13); \ - sldi r12,r9,32; \ - ori r12,r12,(n); \ - /* This reloads r9 before branching to kvmppc_interrupt */ \ - __BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \ -89: mtocrf 0x80,r9; \ - ld r9,area+EX_R9(r13); \ - ld r10,area+EX_R10(r13); \ - b kvmppc_skip_##h##interrupt - -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER -#define KVMTEST(h, n) __KVMTEST(h, n) -#define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n) -#define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n) - -#else -#define KVMTEST(h, n) -#define KVM_HANDLER(area, h, n) -#define KVM_HANDLER_SKIP(area, h, n) -#endif - -#define NOTEST(n) - -#define EXCEPTION_PROLOG_COMMON_1() \ - std r9,_CCR(r1); /* save CR in stackframe */ \ - std r11,_NIP(r1); /* save SRR0 in stackframe */ \ - std r12,_MSR(r1); /* save SRR1 in stackframe */ \ - std r10,0(r1); /* make stack chain pointer */ \ - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r10,GPR1(r1); /* save r1 in stackframe */ \ - - -/* - * The common exception prolog is used for all except a few exceptions - * such as a segment miss on a kernel address. We have to be prepared - * to take another exception from the point where we first touch the - * kernel stack onwards. - * - * On entry r13 points to the paca, r9-r13 are saved in the paca, - * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and - * SRR1, and relocation is on. - */ -#define EXCEPTION_PROLOG_COMMON(n, area) \ - andi. r10,r12,MSR_PR; /* See if coming from user */ \ - mr r10,r1; /* Save r1 */ \ - subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ - beq- 1f; \ - ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ -1: cmpdi cr1,r1,-INT_FRAME_SIZE; /* check if r1 is in userspace */ \ - blt+ cr1,3f; /* abort if it is */ \ - li r1,(n); /* will be reloaded later */ \ - sth r1,PACA_TRAP_SAVE(r13); \ - std r3,area+EX_R3(r13); \ - addi r3,r13,area; /* r3 -> where regs are saved*/ \ - RESTORE_CTR(r1, area); \ - b bad_stack; \ -3: EXCEPTION_PROLOG_COMMON_1(); \ - kuap_save_amr_and_lock r9, r10, cr1, cr0; \ - beq 4f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ - SAVE_PPR(area, r9); \ -4: EXCEPTION_PROLOG_COMMON_2(area) \ - EXCEPTION_PROLOG_COMMON_3(n) \ - ACCOUNT_STOLEN_TIME - -/* Save original regs values from save area to stack frame. */ -#define EXCEPTION_PROLOG_COMMON_2(area) \ - ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ - ld r10,area+EX_R10(r13); \ - std r9,GPR9(r1); \ - std r10,GPR10(r1); \ - ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ - ld r10,area+EX_R12(r13); \ - ld r11,area+EX_R13(r13); \ - std r9,GPR11(r1); \ - std r10,GPR12(r1); \ - std r11,GPR13(r1); \ - BEGIN_FTR_SECTION_NESTED(66); \ - ld r10,area+EX_CFAR(r13); \ - std r10,ORIG_GPR3(r1); \ - END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ - GET_CTR(r10, area); \ - std r10,_CTR(r1); - -#define EXCEPTION_PROLOG_COMMON_3(n) \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - mflr r9; /* Get LR, later save to stack */ \ - ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ - std r9,_LINK(r1); \ - lbz r10,PACAIRQSOFTMASK(r13); \ - mfspr r11,SPRN_XER; /* save XER in stackframe */ \ - std r10,SOFTE(r1); \ - std r11,_XER(r1); \ - li r9,(n)+1; \ - std r9,_TRAP(r1); /* set trap number */ \ - li r10,0; \ - ld r11,exception_marker@toc(r2); \ - std r10,RESULT(r1); /* clear regs->result */ \ - std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ - -/* - * Exception vectors. - */ -#define STD_EXCEPTION(vec, label) \ - EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_STD, KVMTEST_PR, vec); - -/* Version of above for when we have to branch out-of-line */ -#define __OOL_EXCEPTION(vec, label, hdlr) \ - SET_SCRATCH0(r13) \ - EXCEPTION_PROLOG_0(PACA_EXGEN) \ - b hdlr; - -#define STD_EXCEPTION_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \ - EXCEPTION_PROLOG_2(label, EXC_STD) - -#define STD_EXCEPTION_HV(loc, vec, label) \ - EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec); - -#define STD_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ - EXCEPTION_PROLOG_2(label, EXC_HV) - -#define STD_RELON_EXCEPTION(loc, vec, label) \ - /* No guest interrupts come through here */ \ - EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_STD, NOTEST, vec); - -#define STD_RELON_EXCEPTION_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \ - EXCEPTION_PROLOG_2_RELON(label, EXC_STD) - -#define STD_RELON_EXCEPTION_HV(loc, vec, label) \ - EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec); - -#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec); \ - EXCEPTION_PROLOG_2_RELON(label, EXC_HV) - -/* This associate vector numbers with bits in paca->irq_happened */ -#define SOFTEN_VALUE_0x500 PACA_IRQ_EE -#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC -#define SOFTEN_VALUE_0x980 PACA_IRQ_DEC -#define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL -#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL -#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI -#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE -#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI - -#define __SOFTEN_TEST(h, vec, bitmask) \ - lbz r10,PACAIRQSOFTMASK(r13); \ - andi. r10,r10,bitmask; \ - li r10,SOFTEN_VALUE_##vec; \ - bne masked_##h##interrupt - -#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask) - -#define SOFTEN_TEST_PR(vec, bitmask) \ - KVMTEST(EXC_STD, vec); \ - _SOFTEN_TEST(EXC_STD, vec, bitmask) - -#define SOFTEN_TEST_HV(vec, bitmask) \ - KVMTEST(EXC_HV, vec); \ - _SOFTEN_TEST(EXC_HV, vec, bitmask) - -#define KVMTEST_PR(vec) \ - KVMTEST(EXC_STD, vec) - -#define KVMTEST_HV(vec) \ - KVMTEST(EXC_HV, vec) - -#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask) -#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask) - -#define __MASKABLE_EXCEPTION(vec, label, h, extra, bitmask) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(PACA_EXGEN); \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ - EXCEPTION_PROLOG_2(label, h); - -#define MASKABLE_EXCEPTION(vec, label, bitmask) \ - __MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask) - -#define MASKABLE_EXCEPTION_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_2(label, EXC_STD) - -#define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \ - __MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) - -#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ - EXCEPTION_PROLOG_2(label, EXC_HV) - -#define __MASKABLE_RELON_EXCEPTION(vec, label, h, extra, bitmask) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(PACA_EXGEN); \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ - EXCEPTION_PROLOG_2_RELON(label, h) - -#define MASKABLE_RELON_EXCEPTION(vec, label, bitmask) \ - __MASKABLE_RELON_EXCEPTION(vec, label, EXC_STD, SOFTEN_NOTEST_PR, bitmask) - -#define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_2(label, EXC_STD); - -#define MASKABLE_RELON_EXCEPTION_HV(vec, label, bitmask) \ - __MASKABLE_RELON_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) - -#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ - MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ - EXCEPTION_PROLOG_2_RELON(label, EXC_HV) - -/* - * Our exception common code can be passed various "additions" - * to specify the behaviour of interrupts, whether to kick the - * runlatch, etc... - */ - -/* - * This addition reconciles our actual IRQ state with the various software - * flags that track it. This may call C code. - */ -#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11) - -#define ADD_NVGPRS \ - bl save_nvgprs - -#define RUNLATCH_ON \ -BEGIN_FTR_SECTION \ - ld r3, PACA_THREAD_INFO(r13); \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH; \ - beql ppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - -#define EXCEPTION_COMMON(area, trap, label, hdlr, ret, additions) \ - EXCEPTION_PROLOG_COMMON(trap, area); \ - /* Volatile regs are potentially clobbered here */ \ - additions; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret - -/* - * Exception where stack is already set in r1, r1 is saved in r10, and it - * continues rather than returns. - */ -#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ - EXCEPTION_PROLOG_COMMON_1(); \ - kuap_save_amr_and_lock r9, r10, cr1; \ - EXCEPTION_PROLOG_COMMON_2(area); \ - EXCEPTION_PROLOG_COMMON_3(trap); \ - /* Volatile regs are potentially clobbered here */ \ - additions; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr - -#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ - ret_from_except, ADD_NVGPRS;ADD_RECONCILE) - -/* - * Like STD_EXCEPTION_COMMON, but for exceptions that can occur - * in the idle task and therefore need the special idle handling - * (finish nap and runlatch) - */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ - ret_from_except_lite, FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) - -/* - * When the idle code in power4_idle puts the CPU into NAP mode, - * it has to do so in a loop, and relies on the external interrupt - * and decrementer interrupt entry code to get it out of the loop. - * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags - * to signal that it is in the loop and needs help to get out. - */ -#ifdef CONFIG_PPC_970_NAP -#define FINISH_NAP \ -BEGIN_FTR_SECTION \ - ld r11, PACA_THREAD_INFO(r13); \ - ld r9,TI_LOCAL_FLAGS(r11); \ - andi. r10,r9,_TLF_NAPPING; \ - bnel power4_fixup_nap; \ -END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) -#else -#define FINISH_NAP -#endif +#endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index a4f947888744..a466765709a9 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -169,53 +169,6 @@ name: #define ABS_ADDR(label) (label - fs_label + fs_start) -/* - * Following are the BOOK3S exception handler helper macros. - * Handlers come in a number of types, and each type has a number of varieties. - * - * EXC_REAL_* - real, unrelocated exception vectors - * EXC_VIRT_* - virt (AIL), unrelocated exception vectors - * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) - * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) - * TRAMP_KVM - KVM handlers that get put into real, unrelocated - * EXC_COMMON - virt, relocated common handlers - * - * The EXC handlers are given a name, and branch to name_common, or the - * appropriate KVM or masking function. Vector handler verieties are as - * follows: - * - * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception - * - * EXC_{REAL|VIRT} - standard exception - * - * EXC_{REAL|VIRT}_suffix - * where _suffix is: - * - _MASKABLE - maskable exception - * - _OOL - out of line with trampoline to common handler - * - _HV - HV exception - * - * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV - * - * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is - * an OOL vector that branches to a specified handler rather than the usual - * trampoline that goes to common. It, and other underscore macros, should - * be used with care. - * - * KVM handlers come in the following verieties: - * TRAMP_KVM - * TRAMP_KVM_SKIP - * TRAMP_KVM_HV - * TRAMP_KVM_HV_SKIP - * - * COMMON handlers come in the following verieties: - * EXC_COMMON_BEGIN/END - used to open-code the handler - * EXC_COMMON - * EXC_COMMON_ASYNC - * - * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM - * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. - */ - #define EXC_REAL_BEGIN(name, start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size) @@ -255,162 +208,7 @@ name: #define EXC_VIRT_NONE(start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \ - FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); - - -#define EXC_REAL(name, start, size) \ - EXC_REAL_BEGIN(name, start, size); \ - STD_EXCEPTION(start, name##_common); \ - EXC_REAL_END(name, start, size); - -#define EXC_VIRT(name, start, size, realvec) \ - EXC_VIRT_BEGIN(name, start, size); \ - STD_RELON_EXCEPTION(start, realvec, name##_common); \ - EXC_VIRT_END(name, start, size); - -#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ - EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION(start, name##_common, bitmask); \ - EXC_REAL_END(name, start, size); - -#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ - EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION(realvec, name##_common, bitmask); \ - EXC_VIRT_END(name, start, size); - -#define EXC_REAL_HV(name, start, size) \ - EXC_REAL_BEGIN(name, start, size); \ - STD_EXCEPTION_HV(start, start, name##_common); \ - EXC_REAL_END(name, start, size); - -#define EXC_VIRT_HV(name, start, size, realvec) \ - EXC_VIRT_BEGIN(name, start, size); \ - STD_RELON_EXCEPTION_HV(start, realvec, name##_common); \ - EXC_VIRT_END(name, start, size); - -#define __EXC_REAL_OOL(name, start, size) \ - EXC_REAL_BEGIN(name, start, size); \ - __OOL_EXCEPTION(start, label, tramp_real_##name); \ - EXC_REAL_END(name, start, size); - -#define __TRAMP_REAL_OOL(name, vec) \ - TRAMP_REAL_BEGIN(tramp_real_##name); \ - STD_EXCEPTION_OOL(vec, name##_common); - -#define EXC_REAL_OOL(name, start, size) \ - __EXC_REAL_OOL(name, start, size); \ - __TRAMP_REAL_OOL(name, start); - -#define __EXC_REAL_OOL_MASKABLE(name, start, size) \ - __EXC_REAL_OOL(name, start, size); - -#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ - TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_OOL(vec, name##_common, bitmask); - -#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ - __EXC_REAL_OOL_MASKABLE(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask); - -#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ - EXC_REAL_BEGIN(name, start, size); \ - __OOL_EXCEPTION(start, label, handler); \ - EXC_REAL_END(name, start, size); - -#define __EXC_REAL_OOL_HV(name, start, size) \ - __EXC_REAL_OOL(name, start, size); - -#define __TRAMP_REAL_OOL_HV(name, vec) \ - TRAMP_REAL_BEGIN(tramp_real_##name); \ - STD_EXCEPTION_HV_OOL(vec, name##_common); \ - -#define EXC_REAL_OOL_HV(name, start, size) \ - __EXC_REAL_OOL_HV(name, start, size); \ - __TRAMP_REAL_OOL_HV(name, start); - -#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ - __EXC_REAL_OOL(name, start, size); - -#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \ - TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \ - -#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \ - __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask); - -#define __EXC_VIRT_OOL(name, start, size) \ - EXC_VIRT_BEGIN(name, start, size); \ - __OOL_EXCEPTION(start, label, tramp_virt_##name); \ - EXC_VIRT_END(name, start, size); - -#define __TRAMP_VIRT_OOL(name, realvec) \ - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - STD_RELON_EXCEPTION_OOL(realvec, name##_common); - -#define EXC_VIRT_OOL(name, start, size, realvec) \ - __EXC_VIRT_OOL(name, start, size); \ - __TRAMP_VIRT_OOL(name, realvec); - -#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ - __EXC_VIRT_OOL(name, start, size); - -#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_OOL(realvec, name##_common, bitmask); - -#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ - __EXC_VIRT_OOL_MASKABLE(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask); - -#define __EXC_VIRT_OOL_HV(name, start, size) \ - __EXC_VIRT_OOL(name, start, size); - -#define __TRAMP_VIRT_OOL_HV(name, realvec) \ - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - STD_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ - -#define EXC_VIRT_OOL_HV(name, start, size, realvec) \ - __EXC_VIRT_OOL_HV(name, start, size); \ - __TRAMP_VIRT_OOL_HV(name, realvec); - -#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ - __EXC_VIRT_OOL(name, start, size); - -#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \ - TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\ - -#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \ - __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask); - -#define TRAMP_KVM(area, n) \ - TRAMP_KVM_BEGIN(do_kvm_##n); \ - KVM_HANDLER(area, EXC_STD, n); \ - -#define TRAMP_KVM_SKIP(area, n) \ - TRAMP_KVM_BEGIN(do_kvm_##n); \ - KVM_HANDLER_SKIP(area, EXC_STD, n); \ - -/* - * HV variant exceptions get the 0x2 bit added to their trap number. - */ -#define TRAMP_KVM_HV(area, n) \ - TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER(area, EXC_HV, n + 0x2); \ - -#define TRAMP_KVM_HV_SKIP(area, n) \ - TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER_SKIP(area, EXC_HV, n + 0x2); \ - -#define EXC_COMMON(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON(realvec, name, hdlr); \ - -#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ - EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr); \ + FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 78202d5fb13a..67e2da195eae 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -76,18 +76,25 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); -extern int set_dawr(struct arch_hw_breakpoint *brk); +#else /* CONFIG_HAVE_HW_BREAKPOINT */ +static inline void hw_breakpoint_disable(void) { } +static inline void thread_change_pc(struct task_struct *tsk, + struct pt_regs *regs) { } + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + + +#ifdef CONFIG_PPC_DAWR extern bool dawr_force_enable; static inline bool dawr_enabled(void) { return dawr_force_enable; } - -#else /* CONFIG_HAVE_HW_BREAKPOINT */ -static inline void hw_breakpoint_disable(void) { } -static inline void thread_change_pc(struct task_struct *tsk, - struct pt_regs *regs) { } +int set_dawr(struct arch_hw_breakpoint *brk); +#else static inline bool dawr_enabled(void) { return false; } -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; } +#endif + #endif /* __KERNEL__ */ #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 2c1845e5e851..18d342b815e4 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -314,13 +314,5 @@ extern bool iommu_fixed_is_weak; extern const struct dma_map_ops dma_iommu_ops; -static inline unsigned long device_to_mask(struct device *dev) -{ - if (dev->dma_mask && *dev->dma_mask) - return *dev->dma_mask; - /* Assume devices without mask can take 32 bit addresses */ - return 0xfffffffful; -} - #endif /* __KERNEL__ */ #endif /* _ASM_IOMMU_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 806494283e2a..3b4b305796ae 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -5,6 +5,29 @@ */ #ifndef _ASM_POWERPC_LPPACA_H #define _ASM_POWERPC_LPPACA_H + +/* + * The below VPHN macros are outside the __KERNEL__ check since these are + * used for compiling the vphn selftest in userspace + */ + +/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ +#define VPHN_REGISTER_COUNT 6 + +/* + * 6 64-bit registers unpacked into up to 24 be32 associativity values. To + * form the complete property we have to add the length in the first cell. + */ +#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) + +/* + * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags: + * 1 for retrieving associativity information for a guest cpu + * 2 for retrieving associativity information for a host/hypervisor cpu + */ +#define VPHN_FLAG_VCPU 1 +#define VPHN_FLAG_PCPU 2 + #ifdef __KERNEL__ /* @@ -19,6 +42,7 @@ */ #include <linux/cache.h> #include <linux/threads.h> +#include <linux/spinlock_types.h> #include <asm/types.h> #include <asm/mmu.h> #include <asm/firmware.h> @@ -141,7 +165,19 @@ struct dtl_entry { #define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */ #define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry)) +/* + * Dispatch trace log event enable mask: + * 0x1: voluntary virtual processor waits + * 0x2: time-slice preempts + * 0x4: virtual partition memory page faults + */ +#define DTL_LOG_CEDE 0x1 +#define DTL_LOG_PREEMPT 0x2 +#define DTL_LOG_FAULT 0x4 +#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) + extern struct kmem_cache *dtl_cache; +extern rwlock_t dtl_access_lock; /* * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls @@ -151,6 +187,10 @@ extern struct kmem_cache *dtl_cache; */ extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index); +extern void register_dtl_buffer(int cpu); +extern void alloc_dtl_buffers(unsigned long *time_limit); +extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); + #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 09a8553833d1..383242eb0dea 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -564,6 +564,7 @@ enum OpalHMI_XstopType { CHECKSTOP_TYPE_UNKNOWN = 0, CHECKSTOP_TYPE_CORE = 1, CHECKSTOP_TYPE_NX = 2, + CHECKSTOP_TYPE_NPU = 3 }; enum OpalHMI_CoreXstopReason { diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 4ed5d57f2359..57bd029c715e 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -283,8 +283,6 @@ int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio, uint32_t qtoggle, uint32_t qindex); int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01); -int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, - uint64_t desc, uint16_t pe_number); int64_t opal_imc_counters_init(uint32_t type, uint64_t address, uint64_t cpu_pir); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9bd2326bef6f..e3cc9eb9204d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -166,7 +166,9 @@ struct paca_struct { u64 kstack; /* Saved Kernel stack addr */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ +#ifdef CONFIG_PPC_BOOK3E u16 trap_save; /* Used when bad stack is encountered */ +#endif u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 3f53be60fb01..c58ba7963688 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -140,6 +140,44 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p) } #endif +#ifndef pmd_is_leaf +#define pmd_is_leaf pmd_is_leaf +static inline bool pmd_is_leaf(pmd_t pmd) +{ + return false; +} +#endif + +#ifndef pud_is_leaf +#define pud_is_leaf pud_is_leaf +static inline bool pud_is_leaf(pud_t pud) +{ + return false; +} +#endif + +#ifndef pgd_is_leaf +#define pgd_is_leaf pgd_is_leaf +static inline bool pgd_is_leaf(pgd_t pgd) +{ + return false; +} +#endif + +#ifdef CONFIG_PPC64 +#define is_ioremap_addr is_ioremap_addr +static inline bool is_ioremap_addr(const void *x) +{ +#ifdef CONFIG_MMU + unsigned long addr = (unsigned long)x; + + return addr >= IOREMAP_BASE && addr < IOREMAP_END; +#else + return false; +#endif +} +#endif /* CONFIG_PPC64 */ + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h index 208b5503f4ed..7de82647e761 100644 --- a/arch/powerpc/include/asm/pnv-ocxl.h +++ b/arch/powerpc/include/asm/pnv-ocxl.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +/* SPDX-License-Identifier: GPL-2.0+ */ // Copyright 2017 IBM Corp. #ifndef _ASM_PNV_OCXL_H #define _ASM_PNV_OCXL_H diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index b5a85f1bb305..edcb1fc50aeb 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -22,15 +22,9 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); -extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, - u64 desc); -extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind); -extern int pnv_pci_disable_tunnel(struct pci_dev *dev); extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr, int enable); -extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, - u32 *pid, u32 *tid); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index bc69ed2d952c..e1a858718716 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -7,35 +7,13 @@ #define _ASM_POWERNV_H #ifdef CONFIG_PPC_POWERNV -#define NPU2_WRITE 1 extern void powernv_set_nmmu_ptcr(unsigned long ptcr); -extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - void (*cb)(struct npu_context *, void *), - void *priv); -extern void pnv_npu2_destroy_context(struct npu_context *context, - struct pci_dev *gpdev); -extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, - unsigned long *flags, unsigned long *status, - int count); void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val); void pnv_tm_init(void); #else static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } -static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - struct npu_context *(*cb)(struct npu_context *, void *), - void *priv) { return ERR_PTR(-ENODEV); } -static inline void pnv_npu2_destroy_context(struct npu_context *context, - struct pci_dev *gpdev) { } - -static inline int pnv_npu2_handle_fault(struct npu_context *context, - uintptr_t *ea, unsigned long *flags, - unsigned long *status, int count) { - return -ENODEV; -} static inline void pnv_tm_init(void) { } #endif diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 2291daf39cd1..c1df75edde44 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -410,6 +410,15 @@ #define __PPC_RC21 (0x1 << 10) /* + * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits + * has high bit set, high 16 bits must be adjusted. These macros do that (stolen + * from binutils). + */ +#define PPC_LO(v) ((v) & 0xffff) +#define PPC_HI(v) (((v) >> 16) & 0xffff) +#define PPC_HA(v) PPC_HI((v) + 0x8000) + +/* * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a * larx with EH set as an illegal instruction. */ @@ -588,7 +597,16 @@ #define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ ((IH & 0x7) << 21)) -#define PPC_INVALIDATE_ERAT PPC_SLBIA(7) + +/* + * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix + * implies CPU_FTR_ARCH_300). USER/GUEST invalidates may only be used by radix + * mode (on HPT these would also invalidate various SLBEs which may not be + * desired). + */ +#define PPC_ISA_3_0_INVALIDATE_ERAT PPC_SLBIA(7) +#define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3) +#define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6) #define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD | \ ___PPC_RT(vrt) | ___PPC_RA(vra) | \ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ef573fe9873e..a9993e7a443b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode) #define spin_cpu_relax() barrier() -#define spin_cpu_yield() spin_cpu_relax() - #define spin_end() HMT_medium() #define spin_until_cond(cond) \ diff --git a/arch/powerpc/include/asm/ps3stor.h b/arch/powerpc/include/asm/ps3stor.h index d9f6589bc107..1d8279014f22 100644 --- a/arch/powerpc/include/asm/ps3stor.h +++ b/arch/powerpc/include/asm/ps3stor.h @@ -39,7 +39,7 @@ struct ps3_storage_device { unsigned int num_regions; unsigned long accessible_regions; unsigned int region_idx; /* first accessible region */ - struct ps3_storage_region regions[0]; /* Must be last */ + struct ps3_storage_region regions[]; /* Must be last */ }; static inline struct ps3_storage_device *to_ps3_storage_device(struct device *dev) diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h index 2d633e9d686c..33fa5dd8ee6a 100644 --- a/arch/powerpc/include/asm/pte-walk.h +++ b/arch/powerpc/include/asm/pte-walk.h @@ -10,8 +10,20 @@ extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hshift) { + pte_t *pte; + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); - return __find_linux_pte(pgdir, ea, is_thp, hshift); + pte = __find_linux_pte(pgdir, ea, is_thp, hshift); + +#if defined(CONFIG_DEBUG_VM) && \ + !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)) + /* + * We should not find huge page if these configs are not enabled. + */ + if (hshift) + WARN_ON(*hshift); +#endif + return pte; } static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) @@ -26,10 +38,22 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift) static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hshift) { + pte_t *pte; + VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__); VM_WARN(pgdir != current->mm->pgd, "%s lock less page table lookup called on wrong mm\n", __func__); - return __find_linux_pte(pgdir, ea, is_thp, hshift); + pte = __find_linux_pte(pgdir, ea, is_thp, hshift); + +#if defined(CONFIG_DEBUG_VM) && \ + !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)) + /* + * We should not find huge page if these configs are not enabled. + */ + if (hshift) + WARN_ON(*hshift); +#endif + return pte; } #endif /* _ASM_POWERPC_PTE_WALK_H */ diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index faa5a338ac5a..feee1b21bbd5 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -111,18 +111,33 @@ struct pt_regs #ifndef __ASSEMBLY__ -#define GET_IP(regs) ((regs)->nip) -#define GET_USP(regs) ((regs)->gpr[1]) -#define GET_FP(regs) (0) -#define SET_FP(regs, val) +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->nip; +} + +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->nip = val; +} + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->gpr[1]; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return 0; +} #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -#define profile_pc profile_pc +#else +#define profile_pc(regs) instruction_pointer(regs) #endif -#include <asm-generic/ptrace.h> - #define kernel_stack_pointer(regs) ((regs)->gpr[1]) static inline int is_syscall_success(struct pt_regs *regs) { diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f85e2b01c3df..2f7e1ea5089e 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -35,6 +35,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) +extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc); extern int __node_distance(int, int); #define node_distance(a, b) __node_distance(a, b) @@ -84,6 +85,11 @@ static inline int numa_update_cpu_topology(bool cpus_locked) static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} +static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +{ + return 0; +} + #endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 76f34346b642..8b03eb44e876 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -312,6 +312,7 @@ raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { unsigned long ret; + barrier_nospec(); allow_user_access(to, from, n); ret = __copy_tofrom_user(to, from, n); prevent_user_access(to, from, n); diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h index da0b19870570..f93e6b0f5c84 100644 --- a/arch/powerpc/include/asm/vas.h +++ b/arch/powerpc/include/asm/vas.h @@ -163,14 +163,4 @@ int vas_copy_crb(void *crb, int offset); */ int vas_paste_crb(struct vas_window *win, int offset, bool re); -/* - * Return a system-wide unique id for the VAS window @win. - */ -extern u32 vas_win_id(struct vas_window *win); - -/* - * Return the power bus paste address associated with @win so the caller - * can map that address into their address space. - */ -extern u64 vas_win_paste_addr(struct vas_window *win); #endif /* __ASM_POWERPC_VAS_H */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0ea6c4aa3a20..56dfa7a2a6f2 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -56,6 +56,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 31dc7e64cbfc..4ccb6b3a7fbd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -266,7 +266,9 @@ int main(void) OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); +#ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); +#endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 9fbb9d12e0c0..470336277c67 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -891,4 +891,25 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) if (cache) cache_cpu_clear(cache, cpu_id); } + +void cacheinfo_teardown(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_offline(cpu); +} + +void cacheinfo_rebuild(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_online(cpu); +} + #endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h index 955f5e999f1b..52bd3fc6642d 100644 --- a/arch/powerpc/kernel/cacheinfo.h +++ b/arch/powerpc/kernel/cacheinfo.h @@ -6,4 +6,8 @@ extern void cacheinfo_cpu_online(unsigned int cpu_id); extern void cacheinfo_cpu_offline(unsigned int cpu_id); +/* Allow migration/suspend to tear down and rebuild the hierarchy. */ +extern void cacheinfo_teardown(void); +extern void cacheinfo_rebuild(void); + #endif /* _PPC_CACHEINFO_H */ diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c new file mode 100644 index 000000000000..5f66b95b6858 --- /dev/null +++ b/arch/powerpc/kernel/dawr.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * DAWR infrastructure + * + * Copyright 2019, Michael Neuling, IBM Corporation. + */ + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/debugfs.h> +#include <asm/debugfs.h> +#include <asm/machdep.h> +#include <asm/hvcall.h> + +bool dawr_force_enable; +EXPORT_SYMBOL_GPL(dawr_force_enable); + +int set_dawr(struct arch_hw_breakpoint *brk) +{ + unsigned long dawr, dawrx, mrd; + + dawr = brk->address; + + dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) + << (63 - 58); + dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59); + dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3; + /* + * DAWR length is stored in field MDR bits 48:53. Matches range in + * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and + * 0b111111=64DW. + * brk->len is in bytes. + * This aligns up to double word size, shifts and does the bias. + */ + mrd = ((brk->len + 7) >> 3) - 1; + dawrx |= (mrd & 0x3f) << (63 - 53); + + if (ppc_md.set_dawr) + return ppc_md.set_dawr(dawr, dawrx); + + mtspr(SPRN_DAWR, dawr); + mtspr(SPRN_DAWRX, dawrx); + + return 0; +} + +static void set_dawr_cb(void *info) +{ + set_dawr(info); +} + +static ssize_t dawr_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct arch_hw_breakpoint null_brk = {0, 0, 0}; + size_t rc; + + /* Send error to user if they hypervisor won't allow us to write DAWR */ + if (!dawr_force_enable && + firmware_has_feature(FW_FEATURE_LPAR) && + set_dawr(&null_brk) != H_SUCCESS) + return -ENODEV; + + rc = debugfs_write_file_bool(file, user_buf, count, ppos); + if (rc) + return rc; + + /* If we are clearing, make sure all CPUs have the DAWR cleared */ + if (!dawr_force_enable) + smp_call_function(set_dawr_cb, &null_brk, 0); + + return rc; +} + +static const struct file_operations dawr_enable_fops = { + .read = debugfs_read_file_bool, + .write = dawr_write_file_bool, + .open = simple_open, + .llseek = default_llseek, +}; + +static int __init dawr_force_setup(void) +{ + if (cpu_has_feature(CPU_FTR_DAWR)) { + /* Don't setup sysfs file for user control on P8 */ + dawr_force_enable = true; + return 0; + } + + if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { + /* Turn DAWR off by default, but allow admin to turn it on */ + debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, + powerpc_debugfs_root, + &dawr_force_enable, + &dawr_enable_fops); + } + return 0; +} +arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 09231ef06d01..a0879674a9c8 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -71,7 +71,7 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page, return dma_direct_map_page(dev, page, offset, size, direction, attrs); return iommu_map_page(dev, get_iommu_table_base(dev), page, offset, - size, device_to_mask(dev), direction, attrs); + size, dma_get_mask(dev), direction, attrs); } @@ -82,6 +82,8 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle, if (!dma_iommu_map_bypass(dev, attrs)) iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction, attrs); + else + dma_direct_unmap_page(dev, dma_handle, size, direction, attrs); } @@ -92,7 +94,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, if (dma_iommu_map_bypass(dev, attrs)) return dma_direct_map_sg(dev, sglist, nelems, direction, attrs); return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems, - device_to_mask(dev), direction, attrs); + dma_get_mask(dev), direction, attrs); } static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, @@ -102,6 +104,8 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist, if (!dma_iommu_map_bypass(dev, attrs)) ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction, attrs); + else + dma_direct_unmap_sg(dev, sglist, nelems, direction, attrs); } static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) @@ -163,6 +167,34 @@ u64 dma_iommu_get_required_mask(struct device *dev) return mask; } +static void dma_iommu_sync_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + if (dma_iommu_alloc_bypass(dev)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); +} + +static void dma_iommu_sync_for_device(struct device *dev, dma_addr_t addr, + size_t sz, enum dma_data_direction dir) +{ + if (dma_iommu_alloc_bypass(dev)) + dma_direct_sync_single_for_device(dev, addr, sz, dir); +} + +extern void dma_iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + if (dma_iommu_alloc_bypass(dev)) + dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); +} + +extern void dma_iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + if (dma_iommu_alloc_bypass(dev)) + dma_direct_sync_sg_for_device(dev, sgl, nents, dir); +} + const struct dma_map_ops dma_iommu_ops = { .alloc = dma_iommu_alloc_coherent, .free = dma_iommu_free_coherent, @@ -172,4 +204,8 @@ const struct dma_map_ops dma_iommu_ops = { .map_page = dma_iommu_map_page, .unmap_page = dma_iommu_unmap_page, .get_required_mask = dma_iommu_get_required_mask, + .sync_single_for_cpu = dma_iommu_sync_for_cpu, + .sync_single_for_device = dma_iommu_sync_for_device, + .sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu, + .sync_sg_for_device = dma_iommu_sync_sg_for_device, }; diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f192d57db47d..c0e4b73191f3 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -354,10 +354,19 @@ static inline unsigned long eeh_token_to_phys(unsigned long token) ptep = find_init_mm_pte(token, &hugepage_shift); if (!ptep) return token; - WARN_ON(hugepage_shift); - pa = pte_pfn(*ptep) << PAGE_SHIFT; - return pa | (token & (PAGE_SIZE-1)); + pa = pte_pfn(*ptep); + + /* On radix we can do hugepage mappings for io, so handle that */ + if (hugepage_shift) { + pa <<= hugepage_shift; + pa |= token & ((1ul << hugepage_shift) - 1); + } else { + pa <<= PAGE_SHIFT; + pa |= token & (PAGE_SIZE - 1); + } + + return pa; } /* diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 320472373122..05ffd32b3416 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -18,6 +18,8 @@ /** + * DOC: Overview + * * The pci address cache subsystem. This subsystem places * PCI device address resources into a red-black tree, sorted * according to the address range, so that given only an i/o @@ -34,6 +36,7 @@ * than any hash algo I could think of for this problem, even * with the penalty of slow pointer chases for d-cache misses). */ + struct pci_io_addr_range { struct rb_node rb_node; resource_size_t addr_lo; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6b86055e5251..eee5bef736c8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -21,6 +21,698 @@ #include <asm/feature-fixups.h> #include <asm/kup.h> +/* PACA save area offsets (exgen, exmc, etc) */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_DAR 40 +#define EX_DSISR 48 +#define EX_CCR 52 +#define EX_CFAR 56 +#define EX_PPR 64 +#if defined(CONFIG_RELOCATABLE) +#define EX_CTR 72 +.if EX_SIZE != 10 + .error "EX_SIZE is wrong" +.endif +#else +.if EX_SIZE != 9 + .error "EX_SIZE is wrong" +.endif +#endif + +/* + * We're short on space and time in the exception prolog, so we can't + * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. + * Instead we get the base of the kernel from paca->kernelbase and or in the low + * part of label. This requires that the label be within 64KB of kernelbase, and + * that kernelbase be 64K aligned. + */ +#define LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); /* get high part of &label */ \ + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) + +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l + +/* + * Branches from unrelocated code (e.g., interrupts) to labels outside + * head-y require >64K offsets. + */ +#define __LOAD_FAR_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; \ + addis reg,reg,(ABS_ADDR(label))@h + +/* Exception register prefixes */ +#define EXC_HV 1 +#define EXC_STD 0 + +#if defined(CONFIG_RELOCATABLE) +/* + * If we support interrupts with relocation on AND we're a relocatable kernel, + * we need to use CTR to get to the 2nd level handler. So, save/restore it + * when required. + */ +#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13) +#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13) +#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg +#else +/* ...else CTR is unused and in register. */ +#define SAVE_CTR(reg, area) +#define GET_CTR(reg, area) mfctr reg +#define RESTORE_CTR(reg, area) +#endif + +/* + * PPR save/restore macros used in exceptions-64s.S + * Used for P7 or later processors + */ +#define SAVE_PPR(area, ra) \ +BEGIN_FTR_SECTION_NESTED(940) \ + ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ + std ra,_PPR(r1); \ +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) + +#define RESTORE_PPR_PACA(area, ra) \ +BEGIN_FTR_SECTION_NESTED(941) \ + ld ra,area+EX_PPR(r13); \ + mtspr SPRN_PPR,ra; \ +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) + +/* + * Get an SPR into a register if the CPU has the given feature + */ +#define OPT_GET_SPR(ra, spr, ftr) \ +BEGIN_FTR_SECTION_NESTED(943) \ + mfspr ra,spr; \ +END_FTR_SECTION_NESTED(ftr,ftr,943) + +/* + * Set an SPR from a register if the CPU has the given feature + */ +#define OPT_SET_SPR(ra, spr, ftr) \ +BEGIN_FTR_SECTION_NESTED(943) \ + mtspr spr,ra; \ +END_FTR_SECTION_NESTED(ftr,ftr,943) + +/* + * Save a register to the PACA if the CPU has the given feature + */ +#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ +BEGIN_FTR_SECTION_NESTED(943) \ + std ra,offset(r13); \ +END_FTR_SECTION_NESTED(ftr,ftr,943) + +.macro EXCEPTION_PROLOG_0 area + SET_SCRATCH0(r13) /* save r13 */ + GET_PACA(r13) + std r9,\area\()+EX_R9(r13) /* save r9 */ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) + HMT_MEDIUM + std r10,\area\()+EX_R10(r13) /* save r10 - r12 */ + OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +.endm + +.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask + OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) + OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) + INTERRUPT_TO_KERNEL + SAVE_CTR(r10, \area\()) + mfcr r9 + .if \kvm + KVMTEST \hsrr \vec + .endif + .if \bitmask + lbz r10,PACAIRQSOFTMASK(r13) + andi. r10,r10,\bitmask + /* Associate vector numbers with bits in paca->irq_happened */ + .if \vec == 0x500 || \vec == 0xea0 + li r10,PACA_IRQ_EE + .elseif \vec == 0x900 + li r10,PACA_IRQ_DEC + .elseif \vec == 0xa00 || \vec == 0xe80 + li r10,PACA_IRQ_DBELL + .elseif \vec == 0xe60 + li r10,PACA_IRQ_HMI + .elseif \vec == 0xf00 + li r10,PACA_IRQ_PMI + .else + .abort "Bad maskable vector" + .endif + + .if \hsrr + bne masked_Hinterrupt + .else + bne masked_interrupt + .endif + .endif + + std r11,\area\()+EX_R11(r13) + std r12,\area\()+EX_R12(r13) + + /* + * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI], + * because a d-side MCE will clobber those registers so is + * not recoverable if they are live. + */ + GET_SCRATCH0(r10) + std r10,\area\()+EX_R13(r13) + .if \dar + mfspr r10,SPRN_DAR + std r10,\area\()+EX_DAR(r13) + .endif + .if \dsisr + mfspr r10,SPRN_DSISR + stw r10,\area\()+EX_DSISR(r13) + .endif +.endm + +.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + .if ! \set_ri + xori r10,r10,MSR_RI /* Clear MSR_RI */ + .endif + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + mtspr SPRN_HSRR1,r10 + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mtspr SPRN_SRR1,r10 + .endif + LOAD_HANDLER(r10, \label\()) + .if \hsrr + mtspr SPRN_HSRR0,r10 + HRFI_TO_KERNEL + .else + mtspr SPRN_SRR0,r10 + RFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.endm + +.macro EXCEPTION_PROLOG_2_VIRT label, hsrr +#ifdef CONFIG_RELOCATABLE + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + .endif + LOAD_HANDLER(r12, \label\()) + mtctr r12 + .if \hsrr + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r12,SPRN_SRR1 /* and HSRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + bctr +#else + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + b \label +#endif +.endm + +/* + * Branch to label using its 0xC000 address. This results in instruction + * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned + * on using mtmsr rather than rfid. + * + * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than + * load KBASE for a slight optimisation. + */ +#define BRANCH_TO_C000(reg, label) \ + __LOAD_FAR_HANDLER(reg, label); \ + mtctr reg; \ + bctr + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * If hv is possible, interrupts come into to the hv version + * of the kvmppc_interrupt code, which then jumps to the PR handler, + * kvmppc_interrupt_pr, if the guest is a PR guest. + */ +#define kvmppc_interrupt kvmppc_interrupt_hv +#else +#define kvmppc_interrupt kvmppc_interrupt_pr +#endif + +.macro KVMTEST hsrr, n + lbz r10,HSTATE_IN_GUEST(r13) + cmpwi r10,0 + .if \hsrr + bne do_kvm_H\n + .else + bne do_kvm_\n + .endif +.endm + +.macro KVM_HANDLER area, hsrr, n, skip + .if \skip + cmpwi r10,KVM_GUEST_MODE_SKIP + beq 89f + .else +BEGIN_FTR_SECTION_NESTED(947) + ld r10,\area+EX_CFAR(r13) + std r10,HSTATE_CFAR(r13) +END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) + .endif + +BEGIN_FTR_SECTION_NESTED(948) + ld r10,\area+EX_PPR(r13) + std r10,HSTATE_PPR(r13) +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) + ld r10,\area+EX_R10(r13) + std r12,HSTATE_SCRATCH0(r13) + sldi r12,r9,32 + /* HSRR variants have the 0x2 bit added to their trap number */ + .if \hsrr + ori r12,r12,(\n + 0x2) + .else + ori r12,r12,(\n) + .endif + +#ifdef CONFIG_RELOCATABLE + /* + * KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives + * outside the head section. CONFIG_RELOCATABLE KVM expects CTR + * to be saved in HSTATE_SCRATCH1. + */ + mfctr r9 + std r9,HSTATE_SCRATCH1(r13) + __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) + mtctr r9 + ld r9,\area+EX_R9(r13) + bctr +#else + ld r9,\area+EX_R9(r13) + b kvmppc_interrupt +#endif + + + .if \skip +89: mtocrf 0x80,r9 + ld r9,\area+EX_R9(r13) + ld r10,\area+EX_R10(r13) + .if \hsrr + b kvmppc_skip_Hinterrupt + .else + b kvmppc_skip_interrupt + .endif + .endif +.endm + +#else +.macro KVMTEST hsrr, n +.endm +.macro KVM_HANDLER area, hsrr, n, skip +.endm +#endif + +#define EXCEPTION_PROLOG_COMMON_1() \ + std r9,_CCR(r1); /* save CR in stackframe */ \ + std r11,_NIP(r1); /* save SRR0 in stackframe */ \ + std r12,_MSR(r1); /* save SRR1 in stackframe */ \ + std r10,0(r1); /* make stack chain pointer */ \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r10,GPR1(r1); /* save r1 in stackframe */ \ + +/* Save original regs values from save area to stack frame. */ +#define EXCEPTION_PROLOG_COMMON_2(area) \ + ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ + ld r10,area+EX_R10(r13); \ + std r9,GPR9(r1); \ + std r10,GPR10(r1); \ + ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ + ld r10,area+EX_R12(r13); \ + ld r11,area+EX_R13(r13); \ + std r9,GPR11(r1); \ + std r10,GPR12(r1); \ + std r11,GPR13(r1); \ +BEGIN_FTR_SECTION_NESTED(66); \ + ld r10,area+EX_CFAR(r13); \ + std r10,ORIG_GPR3(r1); \ +END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ + GET_CTR(r10, area); \ + std r10,_CTR(r1); + +#define EXCEPTION_PROLOG_COMMON_3(trap) \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + mflr r9; /* Get LR, later save to stack */ \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + std r9,_LINK(r1); \ + lbz r10,PACAIRQSOFTMASK(r13); \ + mfspr r11,SPRN_XER; /* save XER in stackframe */ \ + std r10,SOFTE(r1); \ + std r11,_XER(r1); \ + li r9,(trap)+1; \ + std r9,_TRAP(r1); /* set trap number */ \ + li r10,0; \ + ld r11,exception_marker@toc(r2); \ + std r10,RESULT(r1); /* clear regs->result */ \ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + +/* + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + */ +#define EXCEPTION_COMMON(area, trap) \ + andi. r10,r12,MSR_PR; /* See if coming from user */ \ + mr r10,r1; /* Save r1 */ \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq- 1f; \ + ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ +1: tdgei r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace */ \ + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \ +3: EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1, cr0; \ + beq 4f; /* if from kernel mode */ \ + ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ + SAVE_PPR(area, r9); \ +4: EXCEPTION_PROLOG_COMMON_2(area); \ + EXCEPTION_PROLOG_COMMON_3(trap); \ + ACCOUNT_STOLEN_TIME + +/* + * Exception where stack is already set in r1, r1 is saved in r10. + * PPR save and CPU accounting is not done (for some reason). + */ +#define EXCEPTION_COMMON_STACK(area, trap) \ + EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1; \ + EXCEPTION_PROLOG_COMMON_2(area); \ + EXCEPTION_PROLOG_COMMON_3(trap) + +/* + * Restore all registers including H/SRR0/1 saved in a stack frame of a + * standard exception. + */ +.macro EXCEPTION_RESTORE_REGS hsrr + /* Move original SRR0 and SRR1 into the respective regs */ + ld r9,_MSR(r1) + .if \hsrr + mtspr SPRN_HSRR1,r9 + .else + mtspr SPRN_SRR1,r9 + .endif + ld r9,_NIP(r1) + .if \hsrr + mtspr SPRN_HSRR0,r9 + .else + mtspr SPRN_SRR0,r9 + .endif + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlr r9 + ld r9,_CCR(r1) + mtcr r9 + REST_8GPRS(2, r1) + REST_4GPRS(10, r1) + REST_GPR(0, r1) + /* restore original r1. */ + ld r1,GPR1(r1) +.endm + +#define RUNLATCH_ON \ +BEGIN_FTR_SECTION \ + ld r3, PACA_THREAD_INFO(r13); \ + ld r4,TI_LOCAL_FLAGS(r3); \ + andi. r0,r4,_TLF_RUNLATCH; \ + beql ppc64_runlatch_on_trampoline; \ +END_FTR_SECTION_IFSET(CPU_FTR_CTRL) + +/* + * When the idle code in power4_idle puts the CPU into NAP mode, + * it has to do so in a loop, and relies on the external interrupt + * and decrementer interrupt entry code to get it out of the loop. + * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags + * to signal that it is in the loop and needs help to get out. + */ +#ifdef CONFIG_PPC_970_NAP +#define FINISH_NAP \ +BEGIN_FTR_SECTION \ + ld r11, PACA_THREAD_INFO(r13); \ + ld r9,TI_LOCAL_FLAGS(r11); \ + andi. r10,r9,_TLF_NAPPING; \ + bnel power4_fixup_nap; \ +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) +#else +#define FINISH_NAP +#endif + +/* + * Following are the BOOK3S exception handler helper macros. + * Handlers come in a number of types, and each type has a number of varieties. + * + * EXC_REAL_* - real, unrelocated exception vectors + * EXC_VIRT_* - virt (AIL), unrelocated exception vectors + * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) + * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) + * TRAMP_KVM - KVM handlers that get put into real, unrelocated + * EXC_COMMON - virt, relocated common handlers + * + * The EXC handlers are given a name, and branch to name_common, or the + * appropriate KVM or masking function. Vector handler verieties are as + * follows: + * + * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception + * + * EXC_{REAL|VIRT} - standard exception + * + * EXC_{REAL|VIRT}_suffix + * where _suffix is: + * - _MASKABLE - maskable exception + * - _OOL - out of line with trampoline to common handler + * - _HV - HV exception + * + * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV + * + * KVM handlers come in the following verieties: + * TRAMP_KVM + * TRAMP_KVM_SKIP + * TRAMP_KVM_HV + * TRAMP_KVM_HV_SKIP + * + * COMMON handlers come in the following verieties: + * EXC_COMMON_BEGIN/END - used to open-code the handler + * EXC_COMMON + * EXC_COMMON_ASYNC + * + * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM + * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. + */ + +#define __EXC_REAL(name, start, size, area) \ + EXC_REAL_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 area ; \ + EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ + EXC_REAL_END(name, start, size) + +#define EXC_REAL(name, start, size) \ + __EXC_REAL(name, start, size, PACA_EXGEN) + +#define __EXC_VIRT(name, start, size, realvec, area) \ + EXC_VIRT_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 area ; \ + EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \ + EXC_VIRT_END(name, start, size) + +#define EXC_VIRT(name, start, size, realvec) \ + __EXC_VIRT(name, start, size, realvec, PACA_EXGEN) + +#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ + EXC_REAL_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ + EXC_REAL_END(name, start, size) + +#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ + EXC_VIRT_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \ + EXC_VIRT_END(name, start, size) + +#define EXC_REAL_HV(name, start, size) \ + EXC_REAL_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN; \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \ + EXC_REAL_END(name, start, size) + +#define EXC_VIRT_HV(name, start, size, realvec) \ + EXC_VIRT_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN; \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \ + EXC_VIRT_END(name, start, size) + +#define __EXC_REAL_OOL(name, start, size) \ + EXC_REAL_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ + b tramp_real_##name ; \ + EXC_REAL_END(name, start, size) + +#define __TRAMP_REAL_OOL(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 + +#define EXC_REAL_OOL(name, start, size) \ + __EXC_REAL_OOL(name, start, size); \ + __TRAMP_REAL_OOL(name, start) + +#define __EXC_REAL_OOL_MASKABLE(name, start, size) \ + __EXC_REAL_OOL(name, start, size) + +#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 + +#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ + __EXC_REAL_OOL_MASKABLE(name, start, size); \ + __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask) + +#define __EXC_REAL_OOL_HV(name, start, size) \ + __EXC_REAL_OOL(name, start, size) + +#define __TRAMP_REAL_OOL_HV(name, vec) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 + +#define EXC_REAL_OOL_HV(name, start, size) \ + __EXC_REAL_OOL_HV(name, start, size); \ + __TRAMP_REAL_OOL_HV(name, start) + +#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ + __EXC_REAL_OOL(name, start, size) + +#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \ + TRAMP_REAL_BEGIN(tramp_real_##name); \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 + +#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \ + __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ + __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask) + +#define __EXC_VIRT_OOL(name, start, size) \ + EXC_VIRT_BEGIN(name, start, size); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ + b tramp_virt_##name; \ + EXC_VIRT_END(name, start, size) + +#define __TRAMP_VIRT_OOL(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD + +#define EXC_VIRT_OOL(name, start, size, realvec) \ + __EXC_VIRT_OOL(name, start, size); \ + __TRAMP_VIRT_OOL(name, realvec) + +#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ + __EXC_VIRT_OOL(name, start, size) + +#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 + +#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ + __EXC_VIRT_OOL_MASKABLE(name, start, size); \ + __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) + +#define __EXC_VIRT_OOL_HV(name, start, size) \ + __EXC_VIRT_OOL(name, start, size) + +#define __TRAMP_VIRT_OOL_HV(name, realvec) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV + +#define EXC_VIRT_OOL_HV(name, start, size, realvec) \ + __EXC_VIRT_OOL_HV(name, start, size); \ + __TRAMP_VIRT_OOL_HV(name, realvec) + +#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ + __EXC_VIRT_OOL(name, start, size) + +#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \ + TRAMP_VIRT_BEGIN(tramp_virt_##name); \ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, bitmask ; \ + EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV + +#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \ + __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ + __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) + +#define TRAMP_KVM(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_##n); \ + KVM_HANDLER area, EXC_STD, n, 0 + +#define TRAMP_KVM_SKIP(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_##n); \ + KVM_HANDLER area, EXC_STD, n, 1 + +#define TRAMP_KVM_HV(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_H##n); \ + KVM_HANDLER area, EXC_HV, n, 0 + +#define TRAMP_KVM_HV_SKIP(area, n) \ + TRAMP_KVM_BEGIN(do_kvm_H##n); \ + KVM_HANDLER area, EXC_HV, n, 1 + +#define EXC_COMMON(name, realvec, hdlr) \ + EXC_COMMON_BEGIN(name); \ + EXCEPTION_COMMON(PACA_EXGEN, realvec); \ + bl save_nvgprs; \ + RECONCILE_IRQ_STATE(r10, r11); \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except + +/* + * Like EXC_COMMON, but for exceptions that can occur in the idle task and + * therefore need the special idle handling (finish nap and runlatch) + */ +#define EXC_COMMON_ASYNC(name, realvec, hdlr) \ + EXC_COMMON_BEGIN(name); \ + EXCEPTION_COMMON(PACA_EXGEN, realvec); \ + FINISH_NAP; \ + RECONCILE_IRQ_STATE(r10, r11); \ + RUNLATCH_ON; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except_lite + + /* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. @@ -107,6 +799,7 @@ __start_interrupts: EXC_VIRT_NONE(0x4000, 0x100) +EXC_REAL_BEGIN(system_reset, 0x100, 0x100) #ifdef CONFIG_PPC_P7_NAP /* * If running native on arch 2.06 or later, check if we are waking up @@ -114,60 +807,72 @@ EXC_VIRT_NONE(0x4000, 0x100) * bits 46:47. A non-0 value indicates that we are coming from a power * saving state. The idle wakeup handler initially runs in real mode, * but we branch to the 0xc000... address so we can turn on relocation - * with mtmsr. + * with mtmsrd later, after SPRs are restored. + * + * Careful to minimise cost for the fast path (idle wakeup) while + * also avoiding clobbering CFAR for the debug path (non-idle). + * + * For the idle wake case volatile registers can be clobbered, which + * is why we use those initially. If it turns out to not be an idle + * wake, carefully put everything back the way it was, so we can use + * common exception macros to handle it. */ -#define IDLETEST(n) \ - BEGIN_FTR_SECTION ; \ - mfspr r10,SPRN_SRR1 ; \ - rlwinm. r10,r10,47-31,30,31 ; \ - beq- 1f ; \ - cmpwi cr1,r10,2 ; \ - mfspr r3,SPRN_SRR1 ; \ - bltlr cr1 ; /* no state loss, return to idle caller */ \ - BRANCH_TO_C000(r10, system_reset_idle_common) ; \ -1: \ - KVMTEST_PR(n) ; \ - END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#else -#define IDLETEST NOTEST +BEGIN_FTR_SECTION + SET_SCRATCH0(r13) + GET_PACA(r13) + std r3,PACA_EXNMI+0*8(r13) + std r4,PACA_EXNMI+1*8(r13) + std r5,PACA_EXNMI+2*8(r13) + mfspr r3,SPRN_SRR1 + mfocrf r4,0x80 + rlwinm. r5,r3,47-31,30,31 + bne+ system_reset_idle_wake + /* Not powersave wakeup. Restore regs for regular interrupt handler. */ + mtocrf 0x80,r4 + ld r3,PACA_EXNMI+0*8(r13) + ld r4,PACA_EXNMI+1*8(r13) + ld r5,PACA_EXNMI+2*8(r13) + GET_SCRATCH0(r13) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif -EXC_REAL_BEGIN(system_reset, 0x100, 0x100) - SET_SCRATCH0(r13) + EXCEPTION_PROLOG_0 PACA_EXNMI + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0, 0, 0 + EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0 /* * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is * being used, so a nested NMI exception would corrupt it. + * + * In theory, we should not enable relocation here if it was disabled + * in SRR1, because the MMU may not be configured to support it (e.g., + * SLB may have been cleared). In practice, there should only be a few + * small windows where that's the case, and sreset is considered to + * be dangerous anyway. */ - EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD, - IDLETEST, 0x100) - EXC_REAL_END(system_reset, 0x100, 0x100) + EXC_VIRT_NONE(0x4100, 0x100) TRAMP_KVM(PACA_EXNMI, 0x100) #ifdef CONFIG_PPC_P7_NAP -EXC_COMMON_BEGIN(system_reset_idle_common) - /* - * This must be a direct branch (without linker branch stub) because - * we can not use TOC at this point as r2 may not be restored yet. - */ - b idle_return_gpr_loss +TRAMP_REAL_BEGIN(system_reset_idle_wake) + /* We are waking up from idle, so may clobber any volatile register */ + cmpwi cr1,r5,2 + bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ + BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) #endif +#ifdef CONFIG_PPC_PSERIES /* - * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does - * the right thing. We do not want to reconcile because that goes - * through irq tracing which we don't want in NMI. - * - * Save PACAIRQHAPPENED because some code will do a hard disable - * (e.g., xmon). So we want to restore this back to where it was - * when we return. DAR is unused in the stack, so save it there. + * Vectors for the FWNMI option. Share common code. */ -#define ADD_RECONCILE_NMI \ - li r10,IRQS_ALL_DISABLED; \ - stb r10,PACAIRQSOFTMASK(r13); \ - lbz r10,PACAIRQHAPPENED(r13); \ - std r10,_DAR(r1) +TRAMP_REAL_BEGIN(system_reset_fwnmi) + /* See comment at system_reset exception, don't turn on RI */ + EXCEPTION_PROLOG_0 PACA_EXNMI + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 0, 0x100, 0, 0, 0 + EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0 + +#endif /* CONFIG_PPC_PSERIES */ EXC_COMMON_BEGIN(system_reset_common) /* @@ -185,15 +890,27 @@ EXC_COMMON_BEGIN(system_reset_common) mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) subi r1,r1,INT_FRAME_SIZE - EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, - system_reset, system_reset_exception, - ADD_NVGPRS;ADD_RECONCILE_NMI) + EXCEPTION_COMMON_STACK(PACA_EXNMI, 0x100) + bl save_nvgprs + /* + * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does + * the right thing. We do not want to reconcile because that goes + * through irq tracing which we don't want in NMI. + * + * Save PACAIRQHAPPENED because some code will do a hard disable + * (e.g., xmon). So we want to restore this back to where it was + * when we return. DAR is unused in the stack, so save it there. + */ + li r10,IRQS_ALL_DISABLED + stb r10,PACAIRQSOFTMASK(r13) + lbz r10,PACAIRQHAPPENED(r13) + std r10,_DAR(r1) + + addi r3,r1,STACK_FRAME_OVERHEAD + bl system_reset_exception - /* This (and MCE) can be simplified with mtmsrd L=1 */ /* Clear MSR_RI before setting SRR0 and SRR1. */ - li r0,MSR_RI - mfmsr r9 - andc r9,r9,r0 + li r9,0 mtmsrd r9,1 /* @@ -211,52 +928,16 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - /* - * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP. - * Should share common bits... - */ - - /* Move original SRR0 and SRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_SRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_SRR0,r3 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlr r9 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - mtcr r11 - REST_GPR(11, r1) - REST_2GPRS(12, r1) - /* restore original r1. */ - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_STD RFI_TO_USER_OR_KERNEL -#ifdef CONFIG_PPC_PSERIES -/* - * Vectors for the FWNMI option. Share common code. - */ -TRAMP_REAL_BEGIN(system_reset_fwnmi) - SET_SCRATCH0(r13) /* save r13 */ - /* See comment at system_reset exception */ - EXCEPTION_PROLOG_NORI(PACA_EXNMI, system_reset_common, EXC_STD, - NOTEST, 0x100) -#endif /* CONFIG_PPC_PSERIES */ - EXC_REAL_BEGIN(machine_check, 0x200, 0x100) /* This is moved out of line as it can be patched by FW, but * some code path might still want to branch into the original * vector */ - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0(PACA_EXMC) + EXCEPTION_PROLOG_0 PACA_EXMC BEGIN_FTR_SECTION b machine_check_common_early FTR_SECTION_ELSE @@ -265,7 +946,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) TRAMP_REAL_BEGIN(machine_check_common_early) - EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0 /* * Register contents: * R13 = PACA @@ -315,7 +996,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ @@ -344,19 +1025,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) TRAMP_REAL_BEGIN(machine_check_pSeries) .globl machine_check_fwnmi machine_check_fwnmi: - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0(PACA_EXMC) + EXCEPTION_PROLOG_0 PACA_EXMC BEGIN_FTR_SECTION b machine_check_common_early END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) machine_check_pSeries_0: - EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200) + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables * MSR_RI. */ - EXCEPTION_PROLOG_2_NORI(machine_check_common, EXC_STD) + EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -365,11 +1045,7 @@ EXC_COMMON_BEGIN(machine_check_common) * Machine check is different because we use a different * save area: PACA_EXMC instead of PACA_EXGEN. */ - mfspr r10,SPRN_DAR - std r10,PACA_EXMC+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXMC+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + EXCEPTION_COMMON(PACA_EXMC, 0x200) FINISH_NAP RECONCILE_IRQ_STATE(r10, r11) ld r3,PACA_EXMC+EX_DAR(r13) @@ -386,34 +1062,13 @@ EXC_COMMON_BEGIN(machine_check_common) #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ - li r0,MSR_RI; \ - mfmsr r9; /* get MSR value */ \ - andc r9,r9,r0; \ + li r9,0; \ mtmsrd r9,1; /* Clear MSR_RI */ \ - /* Move original SRR0 and SRR1 into the respective regs */ \ - ld r9,_MSR(r1); \ - mtspr SPRN_SRR1,r9; \ - ld r3,_NIP(r1); \ - mtspr SPRN_SRR0,r3; \ - ld r9,_CTR(r1); \ - mtctr r9; \ - ld r9,_XER(r1); \ - mtxer r9; \ - ld r9,_LINK(r1); \ - mtlr r9; \ - REST_GPR(0, r1); \ - REST_8GPRS(2, r1); \ - REST_GPR(10, r1); \ - ld r11,_CCR(r1); \ - mtcr r11; \ - /* Decrement paca->in_mce. */ \ + /* Decrement paca->in_mce now RI is clear. */ \ lhz r12,PACA_IN_MCE(r13); \ subi r12,r12,1; \ sth r12,PACA_IN_MCE(r13); \ - REST_GPR(11, r1); \ - REST_2GPRS(12, r1); \ - /* restore original r1. */ \ - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_STD #ifdef CONFIG_PPC_P7_NAP /* @@ -472,10 +1127,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) * * Go back to nap/sleep/winkle mode again if (b) is true. */ - BEGIN_FTR_SECTION +BEGIN_FTR_SECTION rlwinm. r11,r12,47-31,30,31 bne machine_check_idle_common - END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* @@ -557,8 +1212,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0(PACA_EXMC) + EXCEPTION_PROLOG_0 PACA_EXMC b machine_check_pSeries_0 EXC_COMMON_BEGIN(unrecover_mce) @@ -582,33 +1236,18 @@ EXC_COMMON_BEGIN(mce_return) b . EXC_REAL_BEGIN(data_access, 0x300, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXGEN) + EXCEPTION_PROLOG_0 PACA_EXGEN b tramp_real_data_access EXC_REAL_END(data_access, 0x300, 0x80) TRAMP_REAL_BEGIN(tramp_real_data_access) -EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x300) - /* - * DAR/DSISR must be read before setting MSR[RI], because - * a d-side MCE will clobber those registers so is not - * recoverable if they are live. - */ - mfspr r10,SPRN_DAR - mfspr r11,SPRN_DSISR - std r10,PACA_EXGEN+EX_DAR(r13) - stw r11,PACA_EXGEN+EX_DSISR(r13) -EXCEPTION_PROLOG_2(data_access_common, EXC_STD) + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 1, 1, 0 + EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXGEN) -EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x300) - mfspr r10,SPRN_DAR - mfspr r11,SPRN_DSISR - std r10,PACA_EXGEN+EX_DAR(r13) - stw r11,PACA_EXGEN+EX_DSISR(r13) -EXCEPTION_PROLOG_2_RELON(data_access_common, EXC_STD) + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 1, 1, 0 +EXCEPTION_PROLOG_2_VIRT data_access_common, EXC_STD EXC_VIRT_END(data_access, 0x4300, 0x80) TRAMP_KVM_SKIP(PACA_EXGEN, 0x300) @@ -620,7 +1259,7 @@ EXC_COMMON_BEGIN(data_access_common) * r9 - r13 are saved in paca->exgen. * EX_DAR and EX_DSISR have saved DAR/DSISR */ - EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0x300) RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,PACA_EXGEN+EX_DAR(r13) @@ -636,30 +1275,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXSLB) + EXCEPTION_PROLOG_0 PACA_EXSLB b tramp_real_data_access_slb EXC_REAL_END(data_access_slb, 0x380, 0x80) TRAMP_REAL_BEGIN(tramp_real_data_access_slb) -EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380) - mfspr r10,SPRN_DAR - std r10,PACA_EXSLB+EX_DAR(r13) -EXCEPTION_PROLOG_2(data_access_slb_common, EXC_STD) + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 1, 0x380, 1, 0, 0 + EXCEPTION_PROLOG_2_REAL data_access_slb_common, EXC_STD, 1 EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXSLB) -EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380) - mfspr r10,SPRN_DAR - std r10,PACA_EXSLB+EX_DAR(r13) -EXCEPTION_PROLOG_2_RELON(data_access_slb_common, EXC_STD) + EXCEPTION_PROLOG_0 PACA_EXSLB + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 0, 0x380, 1, 0, 0 + EXCEPTION_PROLOG_2_VIRT data_access_slb_common, EXC_STD EXC_VIRT_END(data_access_slb, 0x4380, 0x80) TRAMP_KVM_SKIP(PACA_EXSLB, 0x380) EXC_COMMON_BEGIN(data_access_slb_common) - EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB) + EXCEPTION_COMMON(PACA_EXSLB, 0x380) ld r4,PACA_EXSLB+EX_DAR(r13) std r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD @@ -689,7 +1322,7 @@ EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400) TRAMP_KVM(PACA_EXGEN, 0x400) EXC_COMMON_BEGIN(instruction_access_common) - EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0x400) RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,_NIP(r1) @@ -704,18 +1337,12 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) -EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80) -EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480); -EXC_REAL_END(instruction_access_slb, 0x480, 0x80) - -EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) -EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480); -EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) - +__EXC_REAL(instruction_access_slb, 0x480, 0x80, PACA_EXSLB) +__EXC_VIRT(instruction_access_slb, 0x4480, 0x80, 0x480, PACA_EXSLB) TRAMP_KVM(PACA_EXSLB, 0x480) EXC_COMMON_BEGIN(instruction_access_slb_common) - EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB) + EXCEPTION_COMMON(PACA_EXSLB, 0x480) ld r4,_NIP(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION @@ -740,25 +1367,25 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) - .globl hardware_interrupt_hv; -hardware_interrupt_hv: - BEGIN_FTR_SECTION - MASKABLE_EXCEPTION_HV(0x500, hardware_interrupt_common, IRQS_DISABLED) - FTR_SECTION_ELSE - MASKABLE_EXCEPTION(0x500, hardware_interrupt_common, IRQS_DISABLED) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + EXCEPTION_PROLOG_0 PACA_EXGEN +BEGIN_FTR_SECTION + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED + EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1 +FTR_SECTION_ELSE + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED + EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) - .globl hardware_interrupt_relon_hv; -hardware_interrupt_relon_hv: - BEGIN_FTR_SECTION - MASKABLE_RELON_EXCEPTION_HV(0x500, hardware_interrupt_common, - IRQS_DISABLED) - FTR_SECTION_ELSE - __MASKABLE_RELON_EXCEPTION(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR, IRQS_DISABLED) - ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + EXCEPTION_PROLOG_0 PACA_EXGEN +BEGIN_FTR_SECTION + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED + EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV +FTR_SECTION_ELSE + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED + EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) TRAMP_KVM(PACA_EXGEN, 0x500) @@ -767,30 +1394,20 @@ EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) EXC_REAL_BEGIN(alignment, 0x600, 0x100) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXGEN) -EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, 0x600) - mfspr r10,SPRN_DAR - mfspr r11,SPRN_DSISR - std r10,PACA_EXGEN+EX_DAR(r13) - stw r11,PACA_EXGEN+EX_DSISR(r13) -EXCEPTION_PROLOG_2(alignment_common, EXC_STD) + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x600, 1, 1, 0 + EXCEPTION_PROLOG_2_REAL alignment_common, EXC_STD, 1 EXC_REAL_END(alignment, 0x600, 0x100) EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0(PACA_EXGEN) -EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x600) - mfspr r10,SPRN_DAR - mfspr r11,SPRN_DSISR - std r10,PACA_EXGEN+EX_DAR(r13) - stw r11,PACA_EXGEN+EX_DSISR(r13) -EXCEPTION_PROLOG_2_RELON(alignment_common, EXC_STD) + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x600, 1, 1, 0 + EXCEPTION_PROLOG_2_VIRT alignment_common, EXC_STD EXC_VIRT_END(alignment, 0x4600, 0x100) TRAMP_KVM(PACA_EXGEN, 0x600) EXC_COMMON_BEGIN(alignment_common) - EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0x600) ld r3,PACA_EXGEN+EX_DAR(r13) lwz r4,PACA_EXGEN+EX_DSISR(r13) std r3,_DAR(r1) @@ -814,21 +1431,25 @@ EXC_COMMON_BEGIN(program_check_common) * we switch to the emergency stack if we're taking a TM Bad Thing from * the kernel. */ - li r10,MSR_PR /* Build a mask of MSR_PR .. */ - oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */ - and r10,r10,r12 /* Mask SRR1 with that. */ - srdi r10,r10,8 /* Shift it so we can compare */ - cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */ - bne 1f /* If != go to normal path. */ - - /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */ - andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */ + + andi. r10,r12,MSR_PR + bne 2f /* If userspace, go normal path */ + + andis. r10,r12,(SRR1_PROGTM)@h + bne 1f /* If TM, emergency */ + + cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */ + blt 2f /* normal path if not */ + + /* Use the emergency stack */ +1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ /* 3 in EXCEPTION_PROLOG_COMMON */ mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ b 3f /* Jump into the macro !! */ -1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) +2: + EXCEPTION_COMMON(PACA_EXGEN, 0x700) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -840,7 +1461,7 @@ EXC_REAL(fp_unavailable, 0x800, 0x100) EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800) TRAMP_KVM(PACA_EXGEN, 0x800) EXC_COMMON_BEGIN(fp_unavailable_common) - EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0x800) bne 1f /* if from user, just load it up */ bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) @@ -932,6 +1553,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * without saving, though xer is not a good idea to use, as hardware may * interpret some bits so it may be costly to change them. */ +.macro SYSTEM_CALL virt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * There is a little bit of juggling to get syscall and hcall @@ -941,95 +1563,67 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * Userspace syscalls have already saved the PPR, hcalls must save * it before setting HMT_MEDIUM. */ -#define SYSCALL_KVMTEST \ - mtctr r13; \ - GET_PACA(r13); \ - std r10,PACA_EXGEN+EX_R10(r13); \ - INTERRUPT_TO_KERNEL; \ - KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ - HMT_MEDIUM; \ - mfctr r9; - + mtctr r13 + GET_PACA(r13) + std r10,PACA_EXGEN+EX_R10(r13) + INTERRUPT_TO_KERNEL + KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */ + mfctr r9 #else -#define SYSCALL_KVMTEST \ - HMT_MEDIUM; \ - mr r9,r13; \ - GET_PACA(r13); \ - INTERRUPT_TO_KERNEL; + mr r9,r13 + GET_PACA(r13) + INTERRUPT_TO_KERNEL #endif - -#define LOAD_SYSCALL_HANDLER(reg) \ - __LOAD_HANDLER(reg, system_call_common) - -/* - * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9, - * and HMT_MEDIUM. - */ -#define SYSCALL_REAL \ - mfspr r11,SPRN_SRR0 ; \ - mfspr r12,SPRN_SRR1 ; \ - LOAD_SYSCALL_HANDLER(r10) ; \ - mtspr SPRN_SRR0,r10 ; \ - ld r10,PACAKMSR(r13) ; \ - mtspr SPRN_SRR1,r10 ; \ - RFI_TO_KERNEL ; \ - b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -#define SYSCALL_FASTENDIAN_TEST \ -BEGIN_FTR_SECTION \ - cmpdi r0,0x1ebe ; \ - beq- 1f ; \ -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ - -#define SYSCALL_FASTENDIAN \ - /* Fast LE/BE switch system call */ \ -1: mfspr r12,SPRN_SRR1 ; \ - xori r12,r12,MSR_LE ; \ - mtspr SPRN_SRR1,r12 ; \ - mr r13,r9 ; \ - RFI_TO_USER ; /* return to userspace */ \ - b . ; /* prevent speculative execution */ -#else -#define SYSCALL_FASTENDIAN_TEST -#define SYSCALL_FASTENDIAN -#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */ +BEGIN_FTR_SECTION + cmpdi r0,0x1ebe + beq- 1f +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) +#endif -#if defined(CONFIG_RELOCATABLE) - /* - * We can't branch directly so we do it via the CTR which - * is volatile across system calls. - */ -#define SYSCALL_VIRT \ - LOAD_SYSCALL_HANDLER(r10) ; \ - mtctr r10 ; \ - mfspr r11,SPRN_SRR0 ; \ - mfspr r12,SPRN_SRR1 ; \ - li r10,MSR_RI ; \ - mtmsrd r10,1 ; \ - bctr ; + /* We reach here with PACA in r13, r13 in r9. */ + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + + HMT_MEDIUM + + .if ! \virt + __LOAD_HANDLER(r10, system_call_common) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + b . /* prevent speculative execution */ + .else + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ +#ifdef CONFIG_RELOCATABLE + __LOAD_HANDLER(r10, system_call_common) + mtctr r10 + bctr #else - /* We can branch directly */ -#define SYSCALL_VIRT \ - mfspr r11,SPRN_SRR0 ; \ - mfspr r12,SPRN_SRR1 ; \ - li r10,MSR_RI ; \ - mtmsrd r10,1 ; /* Set RI (EE=0) */ \ - b system_call_common ; + b system_call_common +#endif + .endif + +#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH + /* Fast LE/BE switch system call */ +1: mfspr r12,SPRN_SRR1 + xori r12,r12,MSR_LE + mtspr SPRN_SRR1,r12 + mr r13,r9 + RFI_TO_USER /* return to userspace */ + b . /* prevent speculative execution */ #endif +.endm EXC_REAL_BEGIN(system_call, 0xc00, 0x100) - SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */ - SYSCALL_FASTENDIAN_TEST - SYSCALL_REAL - SYSCALL_FASTENDIAN + SYSTEM_CALL 0 EXC_REAL_END(system_call, 0xc00, 0x100) EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100) - SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */ - SYSCALL_FASTENDIAN_TEST - SYSCALL_VIRT - SYSCALL_FASTENDIAN + SYSTEM_CALL 1 EXC_VIRT_END(system_call, 0x4c00, 0x100) #ifdef CONFIG_KVM_BOOK3S_64_HANDLER @@ -1053,7 +1647,7 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00) SET_SCRATCH0(r10) std r9,PACA_EXGEN+EX_R9(r13) mfcr r9 - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) + KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0 #endif @@ -1070,7 +1664,7 @@ EXC_COMMON_BEGIN(h_data_storage_common) std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,SPRN_HDSISR stw r10,PACA_EXGEN+EX_DSISR(r13) - EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0xe00) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -1104,65 +1698,55 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * first, and then eventaully from there to the trampoline to get into virtual * mode. */ -__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) -__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED) +EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20) + EXCEPTION_PROLOG_0 PACA_EXGEN + b hmi_exception_early +EXC_REAL_END(hmi_exception, 0xe60, 0x20) EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) - EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, 0xe60) + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, 0 + mfctr r10 /* save ctr, even for !RELOCATABLE */ + BRANCH_TO_C000(r11, hmi_exception_early_common) + +EXC_COMMON_BEGIN(hmi_exception_early_common) + mtctr r10 /* Restore ctr */ + mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ - mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ EXCEPTION_PROLOG_COMMON_1() /* We don't touch AMR here, we never go to virtual mode */ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD - BRANCH_LINK_TO_FAR(DOTSYM(hmi_exception_realmode)) /* Function call ABI */ + bl hmi_exception_realmode cmpdi cr0,r3,0 - - /* Windup the stack. */ - /* Move original HSRR0 and HSRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_HSRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_HSRR0,r3 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlr r9 - REST_GPR(0, r1) - REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - REST_2GPRS(12, r1) bne 1f - mtcr r11 - REST_GPR(11, r1) - ld r1,GPR1(r1) - HRFI_TO_USER_OR_KERNEL -1: mtcr r11 - REST_GPR(11, r1) - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_HV + HRFI_TO_USER_OR_KERNEL +1: /* * Go to virtual mode and pull the HMI event information from * firmware. */ - .globl hmi_exception_after_realmode -hmi_exception_after_realmode: - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXGEN) - b tramp_real_hmi_exception + EXCEPTION_RESTORE_REGS EXC_HV + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, IRQS_DISABLED + EXCEPTION_PROLOG_2_REAL hmi_exception_common, EXC_HV, 1 EXC_COMMON_BEGIN(hmi_exception_common) -EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, - ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) + EXCEPTION_COMMON(PACA_EXGEN, 0xe60) + FINISH_NAP + bl save_nvgprs + RECONCILE_IRQ_STATE(r10, r11) + RUNLATCH_ON + addi r3,r1,STACK_FRAME_OVERHEAD + bl handle_hmi_exception + b ret_from_except EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) @@ -1196,7 +1780,7 @@ EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20) EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20) TRAMP_KVM(PACA_EXGEN, 0xf20) EXC_COMMON_BEGIN(altivec_unavailable_common) - EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0xf20) #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION beq 1f @@ -1233,7 +1817,7 @@ EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20) EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40) TRAMP_KVM(PACA_EXGEN, 0xf40) EXC_COMMON_BEGIN(vsx_unavailable_common) - EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN) + EXCEPTION_COMMON(PACA_EXGEN, 0xf40) #ifdef CONFIG_VSX BEGIN_FTR_SECTION beq 1f @@ -1309,9 +1893,8 @@ EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) - mtspr SPRN_SPRG_HSCRATCH0,r13 - EXCEPTION_PROLOG_0(PACA_EXGEN) - EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500) + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0 #ifdef CONFIG_PPC_DENORMALISATION mfspr r10,SPRN_HSRR1 @@ -1319,8 +1902,8 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) bne+ denorm_assist #endif - KVMTEST_HV(0x1500) - EXCEPTION_PROLOG_2(denorm_common, EXC_HV) + KVMTEST EXC_HV 0x1500 + EXCEPTION_PROLOG_2_REAL denorm_common, EXC_HV, 1 EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100) #ifdef CONFIG_PPC_DENORMALISATION @@ -1346,12 +1929,11 @@ BEGIN_FTR_SECTION mtmsrd r10 sync -#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 -#define FMR4(n) FMR2(n) ; FMR2(n+2) -#define FMR8(n) FMR4(n) ; FMR4(n+4) -#define FMR16(n) FMR8(n) ; FMR8(n+8) -#define FMR32(n) FMR16(n) ; FMR16(n+16) - FMR32(0) + .Lreg=0 + .rept 32 + fmr .Lreg,.Lreg + .Lreg=.Lreg+1 + .endr FTR_SECTION_ELSE /* @@ -1363,12 +1945,11 @@ FTR_SECTION_ELSE mtmsrd r10 sync -#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) -#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) -#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) -#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) -#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) - XVCPSGNDP32(0) + .Lreg=0 + .rept 32 + XVCPSGNDP(.Lreg,.Lreg,.Lreg) + .Lreg=.Lreg+1 + .endr ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) @@ -1379,7 +1960,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * To denormalise we need to move a copy of the register to itself. * For POWER8 we need to do that for all 64 VSX registers */ - XVCPSGNDP32(32) + .Lreg=32 + .rept 32 + XVCPSGNDP(.Lreg,.Lreg,.Lreg) + .Lreg=.Lreg+1 + .endr + denorm_done: mfspr r11,SPRN_HSRR0 subi r11,r11,4 @@ -1442,7 +2028,7 @@ EXC_VIRT_NONE(0x5800, 0x100) std r12,PACA_EXGEN+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,PACA_EXGEN+EX_R13(r13); \ - EXCEPTION_PROLOG_2(soft_nmi_common, _H) + EXCEPTION_PROLOG_2_REAL soft_nmi_common, _H, 1 /* * Branch to soft_nmi_interrupt using the emergency stack. The emergency @@ -1457,9 +2043,11 @@ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) subi r1,r1,INT_FRAME_SIZE - EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, - system_reset, soft_nmi_interrupt, - ADD_NVGPRS;ADD_RECONCILE) + EXCEPTION_COMMON_STACK(PACA_EXGEN, 0x900) + bl save_nvgprs + RECONCILE_IRQ_STATE(r10, r11) + addi r3,r1,STACK_FRAME_OVERHEAD + bl soft_nmi_interrupt b ret_from_except #else /* CONFIG_PPC_WATCHDOG */ @@ -1477,35 +2065,50 @@ EXC_COMMON_BEGIN(soft_nmi_common) * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return. * This is called with r10 containing the value to OR to the paca field. */ -#define MASKED_INTERRUPT(_H) \ -masked_##_H##interrupt: \ - std r11,PACA_EXGEN+EX_R11(r13); \ - lbz r11,PACAIRQHAPPENED(r13); \ - or r11,r11,r10; \ - stb r11,PACAIRQHAPPENED(r13); \ - cmpwi r10,PACA_IRQ_DEC; \ - bne 1f; \ - lis r10,0x7fff; \ - ori r10,r10,0xffff; \ - mtspr SPRN_DEC,r10; \ - b MASKED_DEC_HANDLER_LABEL; \ -1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK; \ - beq 2f; \ - mfspr r10,SPRN_##_H##SRR1; \ - xori r10,r10,MSR_EE; /* clear MSR_EE */ \ - mtspr SPRN_##_H##SRR1,r10; \ - ori r11,r11,PACA_IRQ_HARD_DIS; \ - stb r11,PACAIRQHAPPENED(r13); \ -2: /* done */ \ - mtcrf 0x80,r9; \ - std r1,PACAR1(r13); \ - ld r9,PACA_EXGEN+EX_R9(r13); \ - ld r10,PACA_EXGEN+EX_R10(r13); \ - ld r11,PACA_EXGEN+EX_R11(r13); \ - /* returns to kernel where r13 must be set up, so don't restore it */ \ - ##_H##RFI_TO_KERNEL; \ - b .; \ - MASKED_DEC_HANDLER(_H) +.macro MASKED_INTERRUPT hsrr + .if \hsrr +masked_Hinterrupt: + .else +masked_interrupt: + .endif + std r11,PACA_EXGEN+EX_R11(r13) + lbz r11,PACAIRQHAPPENED(r13) + or r11,r11,r10 + stb r11,PACAIRQHAPPENED(r13) + cmpwi r10,PACA_IRQ_DEC + bne 1f + lis r10,0x7fff + ori r10,r10,0xffff + mtspr SPRN_DEC,r10 + b MASKED_DEC_HANDLER_LABEL +1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK + beq 2f + .if \hsrr + mfspr r10,SPRN_HSRR1 + xori r10,r10,MSR_EE /* clear MSR_EE */ + mtspr SPRN_HSRR1,r10 + .else + mfspr r10,SPRN_SRR1 + xori r10,r10,MSR_EE /* clear MSR_EE */ + mtspr SPRN_SRR1,r10 + .endif + ori r11,r11,PACA_IRQ_HARD_DIS + stb r11,PACAIRQHAPPENED(r13) +2: /* done */ + mtcrf 0x80,r9 + std r1,PACAR1(r13) + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + /* returns to kernel where r13 must be set up, so don't restore it */ + .if \hsrr + HRFI_TO_KERNEL + .else + RFI_TO_KERNEL + .endif + b . + MASKED_DEC_HANDLER(\hsrr\()) +.endm TRAMP_REAL_BEGIN(stf_barrier_fallback) std r9,PACA_EXRFI+EX_R9(r13) @@ -1612,8 +2215,8 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) * cannot reach these if they are put there. */ USE_FIXED_SECTION(virt_trampolines) - MASKED_INTERRUPT() - MASKED_INTERRUPT(H) + MASKED_INTERRUPT EXC_STD + MASKED_INTERRUPT EXC_HV #ifdef CONFIG_KVM_BOOK3S_64_HANDLER TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) @@ -1746,7 +2349,7 @@ handle_page_fault: addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault cmpdi r3,0 - beq+ 12f + beq+ ret_from_except_lite bl save_nvgprs mr r5,r3 addi r3,r1,STACK_FRAME_OVERHEAD @@ -1761,7 +2364,12 @@ handle_dabr_fault: ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_break -12: b ret_from_except_lite + /* + * do_break() may have changed the NV GPRS while handling a breakpoint. + * If so, we need to restore them with their updated values. Don't use + * ret_from_except_lite here. + */ + b ret_from_except #ifdef CONFIG_PPC_BOOK3S_64 @@ -1791,67 +2399,6 @@ handle_dabr_fault: b ret_from_except /* - * Here we have detected that the kernel stack pointer is bad. - * R9 contains the saved CR, r13 points to the paca, - * r10 contains the (bad) kernel stack pointer, - * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using an emergency stack, save the registers there, - * and call kernel_bad_stack(), which panics. - */ -bad_stack: - ld r1,PACAEMERGSP(r13) - subi r1,r1,64+INT_FRAME_SIZE - std r9,_CCR(r1) - std r10,GPR1(r1) - std r11,_NIP(r1) - std r12,_MSR(r1) - mfspr r11,SPRN_DAR - mfspr r12,SPRN_DSISR - std r11,_DAR(r1) - std r12,_DSISR(r1) - mflr r10 - mfctr r11 - mfxer r12 - std r10,_LINK(r1) - std r11,_CTR(r1) - std r12,_XER(r1) - SAVE_GPR(0,r1) - SAVE_GPR(2,r1) - ld r10,EX_R3(r3) - std r10,GPR3(r1) - SAVE_GPR(4,r1) - SAVE_4GPRS(5,r1) - ld r9,EX_R9(r3) - ld r10,EX_R10(r3) - SAVE_2GPRS(9,r1) - ld r9,EX_R11(r3) - ld r10,EX_R12(r3) - ld r11,EX_R13(r3) - std r9,GPR11(r1) - std r10,GPR12(r1) - std r11,GPR13(r1) -BEGIN_FTR_SECTION - ld r10,EX_CFAR(r3) - std r10,ORIG_GPR3(r1) -END_FTR_SECTION_IFSET(CPU_FTR_CFAR) - SAVE_8GPRS(14,r1) - SAVE_10GPRS(22,r1) - lhz r12,PACA_TRAP_SAVE(r13) - std r12,_TRAP(r1) - addi r11,r1,INT_FRAME_SIZE - std r11,0(r1) - li r12,0 - std r12,0(r11) - ld r2,PACATOC(r13) - ld r11,exception_marker@toc(r2) - std r12,RESULT(r1) - std r11,STACK_FRAME_OVERHEAD-16(r1) -1: addi r3,r1,STACK_FRAME_OVERHEAD - bl kernel_bad_stack - b 1b -_ASM_NOKPROBE_SYMBOL(bad_stack); - -/* * When doorbell is triggered from system reset wakeup, the message is * not cleared, so it would fire again when EE is enabled. * diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b5a5c6896019..91d297e696dd 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -900,6 +900,7 @@ p_toc: .8byte __toc_start + 0x8000 - 0b /* * This is where the main kernel code starts. */ +__REF start_here_multiplatform: /* set up the TOC */ bl relative_toc @@ -975,6 +976,7 @@ start_here_multiplatform: RFI b . /* prevent speculative execution */ + .previous /* This is where all platforms converge execution */ start_here_common: diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index a293a53b4365..c8d1fa2e9d53 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -366,59 +366,3 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } - -bool dawr_force_enable; -EXPORT_SYMBOL_GPL(dawr_force_enable); - -static ssize_t dawr_write_file_bool(struct file *file, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct arch_hw_breakpoint null_brk = {0, 0, 0}; - size_t rc; - - /* Send error to user if they hypervisor won't allow us to write DAWR */ - if ((!dawr_force_enable) && - (firmware_has_feature(FW_FEATURE_LPAR)) && - (set_dawr(&null_brk) != H_SUCCESS)) - return -1; - - rc = debugfs_write_file_bool(file, user_buf, count, ppos); - if (rc) - return rc; - - /* If we are clearing, make sure all CPUs have the DAWR cleared */ - if (!dawr_force_enable) - smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0); - - return rc; -} - -static const struct file_operations dawr_enable_fops = { - .read = debugfs_read_file_bool, - .write = dawr_write_file_bool, - .open = simple_open, - .llseek = default_llseek, -}; - -static int __init dawr_force_setup(void) -{ - dawr_force_enable = false; - - if (cpu_has_feature(CPU_FTR_DAWR)) { - /* Don't setup sysfs file for user control on P8 */ - dawr_force_enable = true; - return 0; - } - - if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { - /* Turn DAWR off by default, but allow admin to turn it on */ - dawr_force_enable = false; - debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, - powerpc_debugfs_root, - &dawr_force_enable, - &dawr_enable_fops); - } - return 0; -} -arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index bc68c53af67c..5645bc9cbc09 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -255,7 +255,7 @@ notrace void arch_local_irq_restore(unsigned long mask) irq_happened = get_irq_happened(); if (!irq_happened) { #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(!(mfmsr() & MSR_EE)); + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); #endif return; } @@ -268,7 +268,7 @@ notrace void arch_local_irq_restore(unsigned long mask) */ if (!(irq_happened & PACA_IRQ_HARD_DIS)) { #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - WARN_ON(!(mfmsr() & MSR_EE)); + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); #endif __hard_irq_disable(); #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG @@ -279,7 +279,7 @@ notrace void arch_local_irq_restore(unsigned long mask) * warn if we are wrong. Only do that when IRQ tracing * is enabled as mfmsr() can be costly. */ - if (WARN_ON(mfmsr() & MSR_EE)) + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) __hard_irq_disable(); #endif } diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index e39536aad30d..a814d2dfb5b0 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -82,8 +82,7 @@ static void flush_erat(void) return; } #endif - /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */ - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } #define MCE_FLUSH_SLB 1 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1ad4089dd110..b55a7b4cb543 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -110,58 +110,6 @@ _ASM_NOKPROBE_SYMBOL(flush_icache_range) EXPORT_SYMBOL(flush_icache_range) /* - * Like above, but only do the D-cache. - * - * flush_dcache_range(unsigned long start, unsigned long stop) - * - * flush all bytes from start to stop-1 inclusive - */ -_GLOBAL_TOC(flush_dcache_range) - -/* - * Flush the data cache to memory - * - * Different systems have different cache line sizes - */ - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - mtctr r8 -0: dcbst 0,r6 - add r6,r6,r7 - bdnz 0b - sync - blr -EXPORT_SYMBOL(flush_dcache_range) - -_GLOBAL(flush_inval_dcache_range) - ld r10,PPC64_CACHES@toc(r2) - lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */ - addi r5,r7,-1 - andc r6,r3,r5 /* round low to line bdy */ - subf r8,r6,r4 /* compute length */ - add r8,r8,r5 /* ensure we get enough */ - lwz r9,DCACHEL1LOGBLOCKSIZE(r10)/* Get log-2 of dcache block size */ - srw. r8,r8,r9 /* compute line count */ - beqlr /* nothing to do? */ - sync - isync - mtctr r8 -0: dcbf 0,r6 - add r6,r6,r7 - bdnz 0b - sync - isync - blr - - -/* * Flush a particular page from the data cache to RAM. * Note: this is necessary because the instruction cache does *not* * snoop from the data cache. diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 991d396fb50d..d7134c614c16 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -160,10 +160,12 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x398c0000 + (val & 0xffff)) - return 1; - return 0; + if (entry->jump[0] != (PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val))) + return 0; + if (entry->jump[1] != (PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | + PPC_LO(val))) + return 0; + return 1; } /* Set up a trampoline in the PLT to bounce us to the distant function */ @@ -188,10 +190,16 @@ static uint32_t do_plt_call(void *location, entry++; } - entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ - entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ - entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ - entry->jump[3] = 0x4e800420; /* bctr */ + /* + * lis r12, sym@ha + * addi r12, r12, sym@l + * mtctr r12 + * bctr + */ + entry->jump[0] = PPC_INST_ADDIS | __PPC_RT(R12) | PPC_HA(val); + entry->jump[1] = PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12) | PPC_LO(val); + entry->jump[2] = PPC_INST_MTCTR | __PPC_RS(R12); + entry->jump[3] = PPC_INST_BCTR; pr_debug("Initialized plt for 0x%x at %p\n", val, entry); return (uint32_t)entry; diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index a93b10c48000..007606a48fd9 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -121,20 +121,27 @@ struct ppc64_stub_entry * the stub, but it's significantly shorter to put these values at the * end of the stub code, and patch the stub address (32-bits relative * to the TOC ptr, r2) into the stub. + * + * addis r11,r2, <high> + * addi r11,r11, <low> + * std r2,R2_STACK_OFFSET(r1) + * ld r12,32(r11) + * ld r2,40(r11) + * mtctr r12 + * bctr */ - static u32 ppc64_stub_insns[] = { - 0x3d620000, /* addis r11,r2, <high> */ - 0x396b0000, /* addi r11,r11, <low> */ + PPC_INST_ADDIS | __PPC_RT(R11) | __PPC_RA(R2), + PPC_INST_ADDI | __PPC_RT(R11) | __PPC_RA(R11), /* Save current r2 value in magic place on the stack. */ - 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */ - 0xe98b0020, /* ld r12,32(r11) */ + PPC_INST_STD | __PPC_RS(R2) | __PPC_RA(R1) | R2_STACK_OFFSET, + PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R11) | 32, #ifdef PPC64_ELF_ABI_v1 /* Set up new r2 from function descriptor */ - 0xe84b0028, /* ld r2,40(r11) */ + PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R11) | 40, #endif - 0x7d8903a6, /* mtctr r12 */ - 0x4e800420 /* bctr */ + PPC_INST_MTCTR | __PPC_RS(R12), + PPC_INST_BCTR, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -388,13 +395,6 @@ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; } -/* Both low and high 16 bits are added as SIGNED additions, so if low - 16 bits has high bit set, high 16 bits must be adjusted. These - macros do that (stolen from binutils). */ -#define PPC_LO(v) ((v) & 0xffff) -#define PPC_HI(v) (((v) >> 16) & 0xffff) -#define PPC_HA(v) PPC_HI ((v) + 0x8000) - /* Patch stub to reference function and correct r2 value. */ static inline int create_stub(const Elf64_Shdr *sechdrs, struct ppc64_stub_entry *entry, @@ -699,18 +699,21 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * ld r2, ...(r12) * add r2, r2, r12 */ - if ((((uint32_t *)location)[0] & ~0xfffc) - != 0xe84c0000) + if ((((uint32_t *)location)[0] & ~0xfffc) != + (PPC_INST_LD | __PPC_RT(R2) | __PPC_RA(R12))) break; - if (((uint32_t *)location)[1] != 0x7c426214) + if (((uint32_t *)location)[1] != + (PPC_INST_ADD | __PPC_RT(R2) | __PPC_RA(R2) | __PPC_RB(R12))) break; /* * If found, replace it with: * addis r2, r12, (.TOC.-func)@ha - * addi r2, r12, (.TOC.-func)@l + * addi r2, r2, (.TOC.-func)@l */ - ((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value); - ((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value); + ((uint32_t *)location)[0] = PPC_INST_ADDIS | __PPC_RT(R2) | + __PPC_RA(R12) | PPC_HA(value); + ((uint32_t *)location)[1] = PPC_INST_ADDI | __PPC_RT(R2) | + __PPC_RA(R2) | PPC_LO(value); break; case R_PPC64_REL16_HA: @@ -764,12 +767,19 @@ static unsigned long create_ftrace_stub(const Elf64_Shdr *sechdrs, { struct ppc64_stub_entry *entry; unsigned int i, num_stubs; + /* + * ld r12,PACATOC(r13) + * addis r12,r12,<high> + * addi r12,r12,<low> + * mtctr r12 + * bctr + */ static u32 stub_insns[] = { - 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */ - 0x3d8c0000, /* addis r12,r12,<high> */ - 0x398c0000, /* addi r12,r12,<low> */ - 0x7d8903a6, /* mtctr r12 */ - 0x4e800420, /* bctr */ + PPC_INST_LD | __PPC_RT(R12) | __PPC_RA(R13) | PACATOC, + PPC_INST_ADDIS | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_ADDI | __PPC_RT(R12) | __PPC_RA(R12), + PPC_INST_MTCTR | __PPC_RS(R12), + PPC_INST_BCTR, }; long reladdr; diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 24522aa37665..409c6c1beabf 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -42,6 +42,8 @@ unsigned int pci_parse_of_flags(u32 addr0, int bridge) if (addr0 & 0x02000000) { flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64) + flags |= IORESOURCE_MEM_64; flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; if (addr0 & 0x40000000) flags |= IORESOURCE_PREFETCH @@ -77,10 +79,16 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) const __be32 *addrs; u32 i; int proplen; + bool mark_unset = false; addrs = of_get_property(node, "assigned-addresses", &proplen); - if (!addrs) - return; + if (!addrs || !proplen) { + addrs = of_get_property(node, "reg", &proplen); + if (!addrs || !proplen) + return; + mark_unset = true; + } + pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs); for (; proplen >= 20; proplen -= 20, addrs += 5) { flags = pci_parse_of_flags(of_read_number(addrs, 1), 0); @@ -105,6 +113,8 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev) continue; } res->flags = flags; + if (mark_unset) + res->flags |= IORESOURCE_UNSET; res->name = pci_name(dev); region.start = base; region.end = base + size - 1; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index f0fbbf6a6a1f..8fc4de0d22b4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -639,7 +639,7 @@ void do_break (struct pt_regs *regs, unsigned long address, hw_breakpoint_disable(); /* Deliver the signal to userspace */ - force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)address); } #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ @@ -793,34 +793,6 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } -int set_dawr(struct arch_hw_breakpoint *brk) -{ - unsigned long dawr, dawrx, mrd; - - dawr = brk->address; - - dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \ - << (63 - 58); //* read/write bits */ - dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \ - << (63 - 59); //* translate */ - dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \ - >> 3; //* PRIM bits */ - /* dawr length is stored in field MDR bits 48:53. Matches range in - doublewords (64 bits) baised by -1 eg. 0b000000=1DW and - 0b111111=64DW. - brk->len is in bytes. - This aligns up to double word size, shifts and does the bias. - */ - mrd = ((brk->len + 7) >> 3) - 1; - dawrx |= (mrd & 0x3f) << (63 - 53); - - if (ppc_md.set_dawr) - return ppc_md.set_dawr(dawr, dawrx); - mtspr(SPRN_DAWR, dawr); - mtspr(SPRN_DAWRX, dawrx); - return 0; -} - void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index ed446b7ea164..514707ef6779 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,7 @@ static unsigned long __prombss prom_tce_alloc_end; #ifdef CONFIG_PPC_PSERIES static bool __prombss prom_radix_disable; +static bool __prombss prom_xive_disable; #endif struct platform_support { @@ -804,6 +805,12 @@ static void __init early_cmdline_parse(void) } if (prom_radix_disable) prom_debug("Radix disabled from cmdline\n"); + + opt = prom_strstr(prom_cmd_line, "xive=off"); + if (opt) { + prom_xive_disable = true; + prom_debug("XIVE disabled from cmdline\n"); + } #endif /* CONFIG_PPC_PSERIES */ } @@ -1212,10 +1219,17 @@ static void __init prom_parse_xive_model(u8 val, switch (val) { case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */ prom_debug("XIVE - either mode supported\n"); - support->xive = true; + support->xive = !prom_xive_disable; break; case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */ prom_debug("XIVE - exploitation mode supported\n"); + if (prom_xive_disable) { + /* + * If we __have__ to do XIVE, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option xive=off\n"); + } support->xive = true; break; case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */ @@ -1562,9 +1576,6 @@ static void __init reserve_mem(u64 base, u64 size) static void __init prom_init_mem(void) { phandle node; -#ifdef DEBUG_PROM - char *path; -#endif char type[64]; unsigned int plen; cell_t *p, *endp; @@ -1586,9 +1597,6 @@ static void __init prom_init_mem(void) prom_debug("root_size_cells: %x\n", rsc); prom_debug("scanning memory:\n"); -#ifdef DEBUG_PROM - path = prom_scratch; -#endif for (node = 0; prom_next_node(&node); ) { type[0] = 0; @@ -1613,9 +1621,10 @@ static void __init prom_init_mem(void) endp = p + (plen / sizeof(cell_t)); #ifdef DEBUG_PROM - memset(path, 0, sizeof(prom_scratch)); - call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); - prom_debug(" node %s :\n", path); + memset(prom_scratch, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, prom_scratch, + sizeof(prom_scratch) - 1); + prom_debug(" node %s :\n", prom_scratch); #endif /* DEBUG_PROM */ while ((endp - p) >= (rac + rsc)) { diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 684b0b315c32..8c92febf5f44 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -2521,7 +2521,6 @@ void ptrace_disable(struct task_struct *child) { /* make sure the single step bit is not set. */ user_disable_single_step(child); - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index b824f4c69622..5faf0a64c92b 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -980,18 +980,16 @@ int rtas_ibm_suspend_me(u64 handle) cpu_hotplug_disable(); /* Check if we raced with a CPU-Offline Operation */ - if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) { - pr_err("%s: Raced against a concurrent CPU-Offline\n", - __func__); - atomic_set(&data.error, -EBUSY); + if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) { + pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__); + atomic_set(&data.error, -EAGAIN); goto out_hotplug_enable; } /* Call function on all CPUs. One of us will make the * rtas call */ - if (on_each_cpu(rtas_percpu_suspend_me, &data, 0)) - atomic_set(&data.error, -EINVAL); + on_each_cpu(rtas_percpu_suspend_me, &data, 0); wait_for_completion(&done); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a2b74e057904..f50b708d6d77 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1245,7 +1245,7 @@ SYSCALL_DEFINE0(rt_sigreturn) current->comm, current->pid, rt_sf, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -1334,7 +1334,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, current->comm, current->pid, ctx, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -1512,6 +1512,6 @@ badframe: current->comm, current->pid, addr, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4292ea39baa4..2f80e270c7b0 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -808,7 +808,7 @@ badframe: current->comm, current->pid, "rt_sigreturn", (long)uc, regs->nip, regs->link); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index c612d50c9d18..b84992c10854 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -7,6 +7,7 @@ */ #include <linux/mm.h> +#include <linux/suspend.h> #include <asm/page.h> #include <asm/sections.h> diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index 7a919e9a3400..cbdf86228eaa 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -25,11 +25,19 @@ #define SL_IBAT2 0x48 #define SL_DBAT3 0x50 #define SL_IBAT3 0x58 -#define SL_TB 0x60 -#define SL_R2 0x68 -#define SL_CR 0x6c -#define SL_LR 0x70 -#define SL_R12 0x74 /* r12 to r31 */ +#define SL_DBAT4 0x60 +#define SL_IBAT4 0x68 +#define SL_DBAT5 0x70 +#define SL_IBAT5 0x78 +#define SL_DBAT6 0x80 +#define SL_IBAT6 0x88 +#define SL_DBAT7 0x90 +#define SL_IBAT7 0x98 +#define SL_TB 0xa0 +#define SL_R2 0xa8 +#define SL_CR 0xac +#define SL_LR 0xb0 +#define SL_R12 0xb4 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .data @@ -114,6 +122,41 @@ _GLOBAL(swsusp_arch_suspend) mfibatl r4,3 stw r4,SL_IBAT3+4(r11) +BEGIN_MMU_FTR_SECTION + mfspr r4,SPRN_DBAT4U + stw r4,SL_DBAT4(r11) + mfspr r4,SPRN_DBAT4L + stw r4,SL_DBAT4+4(r11) + mfspr r4,SPRN_DBAT5U + stw r4,SL_DBAT5(r11) + mfspr r4,SPRN_DBAT5L + stw r4,SL_DBAT5+4(r11) + mfspr r4,SPRN_DBAT6U + stw r4,SL_DBAT6(r11) + mfspr r4,SPRN_DBAT6L + stw r4,SL_DBAT6+4(r11) + mfspr r4,SPRN_DBAT7U + stw r4,SL_DBAT7(r11) + mfspr r4,SPRN_DBAT7L + stw r4,SL_DBAT7+4(r11) + mfspr r4,SPRN_IBAT4U + stw r4,SL_IBAT4(r11) + mfspr r4,SPRN_IBAT4L + stw r4,SL_IBAT4+4(r11) + mfspr r4,SPRN_IBAT5U + stw r4,SL_IBAT5(r11) + mfspr r4,SPRN_IBAT5L + stw r4,SL_IBAT5+4(r11) + mfspr r4,SPRN_IBAT6U + stw r4,SL_IBAT6(r11) + mfspr r4,SPRN_IBAT6L + stw r4,SL_IBAT6+4(r11) + mfspr r4,SPRN_IBAT7U + stw r4,SL_IBAT7(r11) + mfspr r4,SPRN_IBAT7L + stw r4,SL_IBAT7+4(r11) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + #if 0 /* Backup various CPU config stuffs */ bl __save_cpu_setup @@ -279,27 +322,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtibatu 3,r4 lwz r4,SL_IBAT3+4(r11) mtibatl 3,r4 -#endif - BEGIN_MMU_FTR_SECTION - li r4,0 + lwz r4,SL_DBAT4(r11) mtspr SPRN_DBAT4U,r4 + lwz r4,SL_DBAT4+4(r11) mtspr SPRN_DBAT4L,r4 + lwz r4,SL_DBAT5(r11) mtspr SPRN_DBAT5U,r4 + lwz r4,SL_DBAT5+4(r11) mtspr SPRN_DBAT5L,r4 + lwz r4,SL_DBAT6(r11) mtspr SPRN_DBAT6U,r4 + lwz r4,SL_DBAT6+4(r11) mtspr SPRN_DBAT6L,r4 + lwz r4,SL_DBAT7(r11) mtspr SPRN_DBAT7U,r4 + lwz r4,SL_DBAT7+4(r11) mtspr SPRN_DBAT7L,r4 + lwz r4,SL_IBAT4(r11) mtspr SPRN_IBAT4U,r4 + lwz r4,SL_IBAT4+4(r11) mtspr SPRN_IBAT4L,r4 + lwz r4,SL_IBAT5(r11) mtspr SPRN_IBAT5U,r4 + lwz r4,SL_IBAT5+4(r11) mtspr SPRN_IBAT5L,r4 + lwz r4,SL_IBAT6(r11) mtspr SPRN_IBAT6U,r4 + lwz r4,SL_IBAT6+4(r11) mtspr SPRN_IBAT6L,r4 + lwz r4,SL_IBAT7(r11) mtspr SPRN_IBAT7U,r4 + lwz r4,SL_IBAT7+4(r11) mtspr SPRN_IBAT7L,r4 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) +#endif /* Flush all TLBs */ lis r4,0x1000 diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 103655d84b4b..f2c3bda2d39f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9fabdce255cd..6ba0fdd1e7f8 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -148,7 +148,7 @@ _GLOBAL(tm_reclaim) /* Stash the stack pointer away for use after reclaim */ std r1, PACAR1(r13) - /* Clear MSR RI since we are about to change r1, EE is already off. */ + /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -474,7 +474,7 @@ restore_gprs: REST_GPR(7, r7) - /* Clear MSR RI since we are about to change r1. EE is already off */ + /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ li r5, 0 mtmsrd r5, 1 diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 517662a56bdc..be1ca98fce5c 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -866,10 +866,6 @@ void arch_ftrace_update_code(int command) #ifdef CONFIG_PPC64 #define PACATOC offsetof(struct paca_struct, kernel_toc) -#define PPC_LO(v) ((v) & 0xffff) -#define PPC_HI(v) (((v) >> 16) & 0xffff) -#define PPC_HA(v) PPC_HI ((v) + 0x8000) - extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; int __init ftrace_dyn_arch_init(void) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 47df30982de1..11caa0291254 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,7 +297,7 @@ NOKPROBE_SYMBOL(die); void user_single_step_report(struct pt_regs *regs) { - force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current); + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip); } static void show_signal_msg(int signr, struct pt_regs *regs, int code, @@ -363,7 +363,7 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) if (!exception_common(signr, regs, code, addr)) return; - force_sig_fault(signr, code, (void __user *)addr, current); + force_sig_fault(signr, code, (void __user *)addr); } /* diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f53997a8ca62..711fca9bc6f0 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM_BOOK3S_32_HANDLER config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER + select PPC_DAWR_FORCE_ENABLE config KVM_BOOK3S_PR_POSSIBLE bool @@ -183,9 +184,9 @@ config KVM_MPIC select HAVE_KVM_MSI help Enable support for emulating MPIC devices inside the - host kernel, rather than relying on userspace to emulate. - Currently, support is limited to certain versions of - Freescale's MPIC implementation. + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. config KVM_XICS bool "KVM in-kernel XICS emulation" diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 08b2dfbc5305..2d415c36a61d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -361,12 +361,6 @@ static void kvmppc_pte_free(pte_t *ptep) kmem_cache_free(kvm_pte_cache, ptep); } -/* Like pmd_huge() and pmd_large(), but works regardless of config options */ -static inline int pmd_is_leaf(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_PTE); -} - static pmd_t *kvmppc_pmd_alloc(void) { return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); @@ -487,7 +481,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { if (!pud_present(*p)) continue; - if (pud_huge(*p)) { + if (pud_is_leaf(*p)) { pud_clear(p); } else { pmd_t *pmd; @@ -586,7 +580,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, new_pud = pud_alloc_one(kvm->mm, gpa); pmd = NULL; - if (pud && pud_present(*pud) && !pud_huge(*pud)) + if (pud && pud_present(*pud) && !pud_is_leaf(*pud)) pmd = pmd_offset(pud, gpa); else if (level <= 1) new_pmd = kvmppc_pmd_alloc(); @@ -609,7 +603,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, new_pud = NULL; } pud = pud_offset(pgd, gpa); - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { unsigned long hgpa = gpa & PUD_MASK; /* Check if we raced and someone else has set the same thing */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 76b1801aa44a..ec1804f822af 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3603,6 +3603,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.slb_max = 0; dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; tb = mftb(); vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; @@ -4122,8 +4124,15 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, preempt_enable(); - /* cancel pending decrementer exception if DEC is now positive */ - if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) + /* + * cancel pending decrementer exception if DEC is now positive, or if + * entering a nested guest in which case the decrementer is now owned + * by L2 and the L1 decrementer is provided in hdec_expires + */ + if (kvmppc_core_pending_dec(vcpu) && + ((get_tb() < vcpu->arch.dec_expires) || + (trap == BOOK3S_INTERRUPT_SYSCALL && + kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); trace_kvm_guest_exit(vcpu); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index cb05ccc8bc6a..7c1909657b55 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -820,6 +820,8 @@ static void flush_guest_tlb(struct kvm *kvm) : : "r" (rb), "i" (1), "i" (1), "i" (0), "r" (0) : "memory"); } + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { /* R=0 PRS=0 RIC=0 */ @@ -828,9 +830,9 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); rb += PPC_BIT(51); /* increment set number */ } + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index 229496e2652e..0db937497169 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -128,7 +128,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); /* L=1 => tresume, L=0 => tsuspend */ if (instr & (1 << 21)) { if (MSR_TM_SUSPENDED(msr)) @@ -172,7 +172,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr &= ~MSR_TS_MASK; return RESUME_GUEST; @@ -202,7 +202,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr = msr | MSR_TS_S; return RESUME_GUEST; } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index e8276161872e..381bf8dea193 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) * * Note: If EOI is incorrectly used by SW to lower the CPPR * value (ie more favored), we do not check for rejection of - * a pending interrupt, this is a SW error and PAPR sepcifies + * a pending interrupt, this is a SW error and PAPR specifies * that we don't have to deal with it. * * The sending of an EOI to the ICS is handled after the diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6d704ad2472b..0dba7eb24f92 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -414,9 +414,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = kvmppc_core_check_processor_compat(); + return kvmppc_core_check_processor_compat(); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index c55f9c27bf79..eebc782d89a5 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -49,7 +49,8 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ obj-y += checksum_$(BITS).o checksum_wrappers.o \ string_$(BITS).o -obj-y += sstep.o ldstfp.o quad.o +obj-y += sstep.o +obj-$(CONFIG_PPC_FPU) += ldstfp.o obj64-y += quad.o obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index e32f477d4426..e00abeabc54d 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -14,8 +14,6 @@ #include <asm/asm-compat.h> #include <linux/errno.h> -#ifdef CONFIG_PPC_FPU - #define STKFRM (PPC_MIN_STKFRM + 16) /* Get the contents of frN into *p; N is in r3 and p is in r4. */ @@ -237,5 +235,3 @@ _GLOBAL(conv_dp_to_sp) MTMSRD(r6) isync blr - -#endif /* CONFIG_PPC_FPU */ diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c index 3c6c134224f8..377712e85605 100644 --- a/arch/powerpc/lib/pmem.c +++ b/arch/powerpc/lib/pmem.c @@ -15,14 +15,14 @@ void arch_wb_cache_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); } EXPORT_SYMBOL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { unsigned long start = (unsigned long) addr; - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); } EXPORT_SYMBOL(arch_invalidate_pmem); @@ -35,7 +35,7 @@ long __copy_from_user_flushcache(void *dest, const void __user *src, unsigned long copied, start = (unsigned long) dest; copied = __copy_from_user(dest, src, size); - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); return copied; } @@ -45,7 +45,7 @@ void *memcpy_flushcache(void *dest, const void *src, size_t size) unsigned long start = (unsigned long) dest; memcpy(dest, src, size); - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); return dest; } diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile index 974b4fc19f4f..fd393b8be14f 100644 --- a/arch/powerpc/mm/book3s64/Makefile +++ b/arch/powerpc/mm/book3s64/Makefile @@ -10,7 +10,6 @@ obj-$(CONFIG_PPC_NATIVE) += hash_native.o obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o -obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o ifdef CONFIG_HUGETLB_PAGE obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 30d62ffe3310..90ab4f31e2b3 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -41,7 +41,7 @@ #define HPTE_LOCK_BIT (56+3) #endif -DEFINE_RAW_SPINLOCK(native_tlbie_lock); +static DEFINE_RAW_SPINLOCK(native_tlbie_lock); static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) { @@ -56,7 +56,7 @@ static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) * tlbiel instruction for hash, set invalidation * i.e., r=1 and is=01 or is=10 or is=11 */ -static inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, +static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, unsigned int pid, unsigned int ric, unsigned int prs) { @@ -112,7 +112,7 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); } void hash__tlbiel_all(unsigned int action) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 28ced26f2a00..9a5963e07a82 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -684,10 +684,8 @@ static void __init htab_init_page_sizes(void) if (mmu_psize_defs[MMU_PAGE_16M].shift && memblock_phys_mem_size() >= 0x40000000) mmu_vmemmap_psize = MMU_PAGE_16M; - else if (mmu_psize_defs[MMU_PAGE_64K].shift) - mmu_vmemmap_psize = MMU_PAGE_64K; else - mmu_vmemmap_psize = MMU_PAGE_4K; + mmu_vmemmap_psize = mmu_virtual_psize; #endif /* CONFIG_SPARSEMEM_VMEMMAP */ printk(KERN_DEBUG "Page orders: linear mapping = %d, " @@ -981,7 +979,7 @@ void __init hash__early_init_devtree(void) htab_scan_page_sizes(); } -struct hash_mm_context init_hash_mm_context; +static struct hash_mm_context init_hash_mm_context; void __init hash__early_init_mmu(void) { #ifndef CONFIG_PPC_64K_PAGES diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index bb70391401f7..2d0cb5ba9a47 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -50,20 +50,52 @@ EXPORT_SYMBOL_GPL(hash__alloc_context_id); void slb_setup_new_exec(void); +static int realloc_context_ids(mm_context_t *ctx) +{ + int i, id; + + /* + * id 0 (aka. ctx->id) is special, we always allocate a new one, even if + * there wasn't one allocated previously (which happens in the exec + * case where ctx is newly allocated). + * + * We have to be a bit careful here. We must keep the existing ids in + * the array, so that we can test if they're non-zero to decide if we + * need to allocate a new one. However in case of error we must free the + * ids we've allocated but *not* any of the existing ones (or risk a + * UAF). That's why we decrement i at the start of the error handling + * loop, to skip the id that we just tested but couldn't reallocate. + */ + for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) { + if (i == 0 || ctx->extended_id[i]) { + id = hash__alloc_context_id(); + if (id < 0) + goto error; + + ctx->extended_id[i] = id; + } + } + + /* The caller expects us to return id */ + return ctx->id; + +error: + for (i--; i >= 0; i--) { + if (ctx->extended_id[i]) + ida_free(&mmu_context_ida, ctx->extended_id[i]); + } + + return id; +} + static int hash__init_new_context(struct mm_struct *mm) { int index; - index = hash__alloc_context_id(); - if (index < 0) - return index; - mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL); - if (!mm->context.hash_context) { - ida_free(&mmu_context_ida, index); + if (!mm->context.hash_context) return -ENOMEM; - } /* * The old code would re-promote on fork, we don't do that when using @@ -91,13 +123,20 @@ static int hash__init_new_context(struct mm_struct *mm) mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); if (!mm->context.hash_context->spt) { - ida_free(&mmu_context_ida, index); kfree(mm->context.hash_context); return -ENOMEM; } } #endif + } + index = realloc_context_ids(&mm->context); + if (index < 0) { +#ifdef CONFIG_PPC_SUBPAGE_PROT + kfree(mm->context.hash_context->spt); +#endif + kfree(mm->context.hash_context); + return index; } pkey_mm_init(mm); @@ -135,7 +174,6 @@ static int radix__init_new_context(struct mm_struct *mm) */ asm volatile("ptesync;isync" : : : "memory"); - mm->context.npu_context = NULL; mm->context.hash_context = NULL; return index; diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 01bc9663360d..7d0e0d0d22c4 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -72,7 +72,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp))); assert_spin_locked(pmd_lockptr(mm, pmdp)); - WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd))); + WARN_ON(!(pmd_large(pmd))); #endif trace_hugepage_set_pmd(addr, pmd_val(pmd)); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); @@ -446,3 +446,24 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } + +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +{ + unsigned long i; + + if (radix_enabled()) + return radix__ioremap_range(ea, pa, size, prot, nid); + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (err) { + if (slab_is_available()) + unmap_kernel_range(ea, size); + else + WARN_ON_ONCE(1); /* Should clean up */ + return err; + } + } + + return 0; +} diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 273ae66a9a45..65c2ba1e1783 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "radix-mmu: " fmt +#include <linux/io.h> #include <linux/kernel.h> #include <linux/sched/mm.h> #include <linux/memblock.h> @@ -198,14 +199,14 @@ void radix__change_memory_range(unsigned long start, unsigned long end, pudp = pud_alloc(&init_mm, pgdp, idx); if (!pudp) continue; - if (pud_huge(*pudp)) { + if (pud_is_leaf(*pudp)) { ptep = (pte_t *)pudp; goto update_the_pte; } pmdp = pmd_alloc(&init_mm, pudp, idx); if (!pmdp) continue; - if (pmd_huge(*pmdp)) { + if (pmd_is_leaf(*pmdp)) { ptep = pmdp_ptep(pmdp); goto update_the_pte; } @@ -319,7 +320,7 @@ static int __meminit create_physical_mapping(unsigned long start, return 0; } -void __init radix_init_pgtable(void) +static void __init radix_init_pgtable(void) { unsigned long rts_field; struct memblock_region *reg; @@ -515,14 +516,6 @@ void __init radix__early_init_devtree(void) mmu_psize_defs[MMU_PAGE_64K].shift = 16; mmu_psize_defs[MMU_PAGE_64K].ap = 0x5; found: -#ifdef CONFIG_SPARSEMEM_VMEMMAP - if (mmu_psize_defs[MMU_PAGE_2M].shift) { - /* - * map vmemmap using 2M if available - */ - mmu_vmemmap_psize = MMU_PAGE_2M; - } -#endif /* CONFIG_SPARSEMEM_VMEMMAP */ return; } @@ -587,7 +580,13 @@ void __init radix__early_init_mmu(void) #ifdef CONFIG_SPARSEMEM_VMEMMAP /* vmemmap mapping */ - mmu_vmemmap_psize = mmu_virtual_psize; + if (mmu_psize_defs[MMU_PAGE_2M].shift) { + /* + * map vmemmap using 2M if available + */ + mmu_vmemmap_psize = MMU_PAGE_2M; + } else + mmu_vmemmap_psize = mmu_virtual_psize; #endif /* * initialize page table size @@ -832,7 +831,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr, if (!pmd_present(*pmd)) continue; - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd); continue; } @@ -857,7 +856,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr, if (!pud_present(*pud)) continue; - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud); continue; } @@ -883,7 +882,7 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) if (!pgd_present(*pgd)) continue; - if (pgd_huge(*pgd)) { + if (pgd_is_leaf(*pgd)) { split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd); continue; } @@ -1118,3 +1117,123 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } + +int __init arch_ioremap_pud_supported(void) +{ + /* HPT does not cope with large pages in the vmalloc area */ + return radix_enabled(); +} + +int __init arch_ioremap_pmd_supported(void) +{ + return radix_enabled(); +} + +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} + +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pud; + pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_huge(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd; + int i; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte; + pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(&init_mm, pte); + } + } + + pmd_free(&init_mm, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + pte_t *ptep = (pte_t *)pmd; + pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot); + + if (!radix_enabled()) + return 0; + + set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd); + + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_huge(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + + pte_free_kernel(&init_mm, pte); + + return 1; +} + +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, + pgprot_t prot, int nid) +{ + if (likely(slab_is_available())) { + int err = ioremap_page_range(ea, ea + size, pa, prot); + if (err) + unmap_kernel_range(ea, size); + return err; + } else { + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (WARN_ON_ONCE(err)) /* Should clean up */ + return err; + } + return 0; + } +} diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index bb9835681315..71f7fede2fa4 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -25,7 +25,7 @@ * tlbiel instruction for radix, set invalidation * i.e., r=1 and is=01 or is=10 or is=11 */ -static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, +static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is, unsigned int pid, unsigned int ric, unsigned int prs) { @@ -83,7 +83,7 @@ void radix__tlbiel_all(unsigned int action) else WARN(1, "%s called on pre-POWER9 CPU\n", __func__); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); } static __always_inline void __tlbiel_pid(unsigned long pid, int set, @@ -146,8 +146,8 @@ static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) trace_tlbie(lpid, 0, rb, rs, ric, prs, r); } -static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, - unsigned long ric) +static __always_inline void __tlbiel_lpid_guest(unsigned long lpid, int set, + unsigned long ric) { unsigned long rb,rs,prs,r; @@ -163,8 +163,8 @@ static inline void __tlbiel_lpid_guest(unsigned long lpid, int set, } -static inline void __tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) +static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -179,8 +179,8 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid, trace_tlbie(0, 1, rb, rs, ric, prs, r); } -static inline void __tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap, unsigned long ric) +static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -195,8 +195,8 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, - unsigned long ap, unsigned long ric) +static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, + unsigned long ap, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -235,7 +235,7 @@ static inline void fixup_tlbie_lpid(unsigned long lpid) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) +static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; @@ -258,7 +258,7 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) __tlbiel_pid(pid, set, RIC_FLUSH_TLB); asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory"); } static inline void _tlbie_pid(unsigned long pid, unsigned long ric) @@ -310,7 +310,7 @@ static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric) __tlbiel_lpid(lpid, set, RIC_FLUSH_TLB); asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST "; isync" : : :"memory"); } static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) @@ -337,7 +337,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) +static __always_inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) { int set; @@ -362,7 +362,7 @@ static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric) __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB); asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } @@ -377,8 +377,8 @@ static inline void __tlbiel_va_range(unsigned long start, unsigned long end, __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); } -static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long psize, unsigned long ric) +static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid, + unsigned long psize, unsigned long ric) { unsigned long ap = mmu_get_ap(psize); @@ -409,8 +409,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); } -static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long psize, unsigned long ric) +static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, + unsigned long psize, unsigned long ric) { unsigned long ap = mmu_get_ap(psize); @@ -420,7 +420,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, +static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long psize, unsigned long ric) { unsigned long ap = mmu_get_ap(psize); @@ -666,6 +666,11 @@ EXPORT_SYMBOL(radix__flush_tlb_page); #define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ +/* + * If kernel TLBIs ever become local rather than global, then + * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it + * assumes kernel TLBIs are global. + */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { _tlbie_pid(0, RIC_FLUSH_ALL); diff --git a/arch/powerpc/mm/book3s64/vphn.h b/arch/powerpc/mm/book3s64/vphn.h deleted file mode 100644 index f0b93c2dd578..000000000000 --- a/arch/powerpc/mm/book3s64/vphn.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ARCH_POWERPC_MM_VPHN_H_ -#define _ARCH_POWERPC_MM_VPHN_H_ - -/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ -#define VPHN_REGISTER_COUNT 6 - -/* - * 6 64-bit registers unpacked into up to 24 be32 associativity values. To - * form the complete property we have to add the length in the first cell. - */ -#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) - -extern int vphn_unpack_associativity(const long *packed, __be32 *unpacked); - -#endif diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index ec6b7ad70659..d989592b6fc8 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -178,13 +178,12 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, if (fault & VM_FAULT_HWPOISON) lsb = PAGE_SHIFT; - force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, - current); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); return 0; } #endif - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); return 0; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index b5d92dc32844..a8953f108808 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -61,12 +61,17 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, num_hugepd = 1; } + if (!cachep) { + WARN_ONCE(1, "No page table cache created for hugetlb tables"); + return -ENOMEM; + } + new = kmem_cache_alloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL)); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); - if (! new) + if (!new) return -ENOMEM; /* @@ -130,6 +135,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz } else { pdshift = PUD_SHIFT; pu = pud_alloc(mm, pg, addr); + if (!pu) + return NULL; if (pshift == PUD_SHIFT) return (pte_t *)pu; else if (pshift > PMD_SHIFT) { @@ -138,6 +145,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz } else { pdshift = PMD_SHIFT; pm = pmd_alloc(mm, pu, addr); + if (!pm) + return NULL; if (pshift == PMD_SHIFT) /* 16MB hugepage */ return (pte_t *)pm; @@ -154,12 +163,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz } else { pdshift = PUD_SHIFT; pu = pud_alloc(mm, pg, addr); + if (!pu) + return NULL; if (pshift >= PUD_SHIFT) { ptl = pud_lockptr(mm, pu); hpdp = (hugepd_t *)pu; } else { pdshift = PMD_SHIFT; pm = pmd_alloc(mm, pu, addr); + if (!pm) + return NULL; ptl = pmd_lockptr(mm, pm); hpdp = (hugepd_t *)pm; } @@ -511,13 +524,6 @@ retry: return page; } -static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, - unsigned long sz) -{ - unsigned long __boundary = (addr + sz) & ~(sz-1); - return (__boundary - 1 < end - 1) ? __boundary : end; -} - #ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -588,6 +594,7 @@ __setup("hugepagesz=", hugepage_setup_sz); static int __init hugetlbpage_init(void) { + bool configured = false; int psize; if (hugetlb_disabled) { @@ -638,10 +645,15 @@ static int __init hugetlbpage_init(void) pgtable_cache_add(pdshift - shift); else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) pgtable_cache_add(PTE_T_ORDER); + + configured = true; } - if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) - hugetlbpage_init_default(); + if (configured) { + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_default(); + } else + pr_info("Failed to initialize. Disabling HugeTLB"); return 0; } @@ -665,68 +677,3 @@ void flush_dcache_icache_hugepage(struct page *page) } } } - -static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long pte_end; - struct page *head, *page; - pte_t pte; - int refs; - - pte_end = (addr + sz) & ~(sz-1); - if (pte_end < end) - end = pte_end; - - pte = READ_ONCE(*ptep); - - if (!pte_access_permitted(pte, write)) - return 0; - - /* hugepages are never "special" */ - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - refs = 0; - head = pte_page(pte); - - page = head + ((addr & (sz-1)) >> PAGE_SHIFT); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - /* Could be optimized better */ - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, - unsigned long end, int write, struct page **pages, int *nr) -{ - pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(hugepd); - unsigned long next; - - ptep = hugepte_offset(hugepd, addr, pdshift); - do { - next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) - return 0; - } while (ptep++, addr = next, addr != end); - - return 1; -} diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a4e17a979e45..a44f6281ca3a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -194,8 +194,11 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * fail due to alignment issues when using 16MB hugepages, so * fall back to system memory if the altmap allocation fail. */ - if (altmap) + if (altmap) { p = altmap_alloc_block_buf(page_size, altmap); + if (!p) + pr_debug("altmap block allocation failed, falling back to system memory"); + } if (!p) p = vmemmap_alloc_block_buf(page_size, node); if (!p) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2540d3b2588c..6d5f0fc76666 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -120,7 +120,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, start, start + size, rc); return -EFAULT; } - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); return __add_pages(nid, start_pfn, nr_pages, restrictions); } @@ -146,7 +146,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); - flush_inval_dcache_range(start, start + size); + flush_dcache_range(start, start + size); ret = remove_section_mapping(start, start + size); WARN_ON_ONCE(ret); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 917904d2fe97..50d68d21ddcc 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -163,6 +163,22 @@ static void unmap_cpu_from_node(unsigned long cpu) } #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */ +int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) +{ + int dist = 0; + + int i, index; + + for (i = 0; i < distance_ref_points_depth; i++) { + index = be32_to_cpu(distance_ref_points[i]); + if (cpu1_assoc[index] == cpu2_assoc[index]) + break; + dist++; + } + + return dist; +} + /* must hold reference to node during call */ static const __be32 *of_get_associativity(struct device_node *dev) { @@ -212,7 +228,7 @@ static int associativity_to_nid(const __be32 *associativity) { int nid = NUMA_NO_NODE; - if (min_common_depth == -1) + if (!numa_enabled) goto out; if (of_read_number(associativity, 1) >= min_common_depth) @@ -416,17 +432,19 @@ static int of_get_assoc_arrays(struct assoc_arrays *aa) static int of_drconf_to_nid_single(struct drmem_lmb *lmb) { struct assoc_arrays aa = { .arrays = NULL }; - int default_nid = 0; + int default_nid = NUMA_NO_NODE; int nid = default_nid; int rc, index; + if ((min_common_depth < 0) || !numa_enabled) + return default_nid; + rc = of_get_assoc_arrays(&aa); if (rc) return default_nid; - if (min_common_depth > 0 && min_common_depth <= aa.array_sz && - !(lmb->flags & DRCONF_MEM_AI_INVALID) && - lmb->aa_index < aa.n_arrays) { + if (min_common_depth <= aa.array_sz && + !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) { index = lmb->aa_index * aa.array_sz + min_common_depth - 1; nid = of_read_number(&aa.arrays[index], 1); @@ -626,8 +644,14 @@ static int __init parse_numa_properties(void) min_common_depth = find_min_common_depth(); - if (min_common_depth < 0) + if (min_common_depth < 0) { + /* + * if we fail to parse min_common_depth from device tree + * mark the numa disabled, boot with numa disabled. + */ + numa_enabled = false; return min_common_depth; + } dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth); @@ -743,7 +767,7 @@ void __init dump_numa_cpu_topology(void) unsigned int node; unsigned int cpu, count; - if (min_common_depth == -1 || !numa_enabled) + if (!numa_enabled) return; for_each_online_node(node) { @@ -808,7 +832,7 @@ static void __init find_possible_nodes(void) struct device_node *rtas; u32 numnodes, i; - if (min_common_depth <= 0) + if (!numa_enabled) return; rtas = of_find_node_by_path("/rtas"); @@ -1010,7 +1034,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) struct device_node *memory = NULL; int nid; - if (!numa_enabled || (min_common_depth < 0)) + if (!numa_enabled) return first_online_node; memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); @@ -1063,9 +1087,6 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR - -#include "book3s64/vphn.h" - struct topology_update_data { struct topology_update_data *next; unsigned int cpu; @@ -1161,25 +1182,13 @@ static int update_cpu_associativity_changes_mask(void) * Retrieve the new associativity information for a virtual processor's * home node. */ -static long hcall_vphn(unsigned long cpu, __be32 *associativity) -{ - long rc; - long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; - u64 flags = 1; - int hwcpu = get_hard_smp_processor_id(cpu); - - rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); - vphn_unpack_associativity(retbuf, associativity); - - return rc; -} - static long vphn_get_associativity(unsigned long cpu, __be32 *associativity) { long rc; - rc = hcall_vphn(cpu, associativity); + rc = hcall_vphn(get_hard_smp_processor_id(cpu), + VPHN_FLAG_VCPU, associativity); switch (rc) { case H_FUNCTION: diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index fc10c0c24f51..e3759b69f81b 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -336,10 +336,11 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, if (pgd_none(pgd)) return NULL; - if (pgd_huge(pgd)) { + if (pgd_is_leaf(pgd)) { ret_pte = (pte_t *)pgdp; goto out; } + if (is_hugepd(__hugepd(pgd_val(pgd)))) { hpdp = (hugepd_t *)&pgd; goto out_huge; @@ -357,14 +358,16 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, if (pud_none(pud)) return NULL; - if (pud_huge(pud)) { + if (pud_is_leaf(pud)) { ret_pte = (pte_t *)pudp; goto out; } + if (is_hugepd(__hugepd(pud_val(pud)))) { hpdp = (hugepd_t *)&pud; goto out_huge; } + pdshift = PMD_SHIFT; pmdp = pmd_offset(&pud, ea); pmd = READ_ONCE(*pmdp); @@ -393,15 +396,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, ret_pte = (pte_t *)pmdp; goto out; } - /* - * pmd_large check below will handle the swap pmd pte - * we need to do both the check because they are config - * dependent. - */ - if (pmd_huge(pmd) || pmd_large(pmd)) { + + if (pmd_is_leaf(pmd)) { ret_pte = (pte_t *)pmdp; goto out; } + if (is_hugepd(__hugepd(pmd_val(pmd)))) { hpdp = (hugepd_t *)&pmd; goto out_huge; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index d53188dee18f..35cb96cfc258 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -360,7 +360,7 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_stext) + 1) + if (v_block_mapped((unsigned long)_stext + 1)) mmu_mark_initmem_nx(); else change_page_attr(page, numpages, PAGE_KERNEL); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 12d5e083942d..9ad59b733984 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -103,14 +103,30 @@ unsigned long ioremap_bot; unsigned long ioremap_bot = IOREMAP_BASE; #endif +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +{ + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (err) { + if (slab_is_available()) + unmap_kernel_range(ea, size); + else + WARN_ON_ONCE(1); /* Should clean up */ + return err; + } + } + + return 0; +} + /** * __ioremap_at - Low level function to establish the page tables * for an IO mapping */ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) { - unsigned long i; - /* We don't support the 4K PFN hack with ioremap */ if (pgprot_val(prot) & H_PAGE_4K_PFN) return NULL; @@ -124,9 +140,8 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_ WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); - for (i = 0; i < size; i += PAGE_SIZE) - if (map_kernel_page((unsigned long)ea + i, pa + i, prot)) - return NULL; + if (ioremap_range((unsigned long)ea, pa, size, prot, NUMA_NO_NODE)) + return NULL; return (void __iomem *)ea; } @@ -177,8 +192,6 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size, area->phys_addr = paligned; ret = __ioremap_at(paligned, area->addr, size, prot); - if (!ret) - vunmap(area->addr); } else { ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot); if (ret) @@ -291,16 +304,20 @@ EXPORT_SYMBOL(__iounmap_at); /* 4 level page table */ struct page *pgd_page(pgd_t pgd) { - if (pgd_huge(pgd)) + if (pgd_is_leaf(pgd)) { + VM_WARN_ON(!pgd_huge(pgd)); return pte_page(pgd_pte(pgd)); + } return virt_to_page(pgd_page_vaddr(pgd)); } #endif struct page *pud_page(pud_t pud) { - if (pud_huge(pud)) + if (pud_is_leaf(pud)) { + VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); + } return virt_to_page(pud_page_vaddr(pud)); } @@ -310,8 +327,10 @@ struct page *pud_page(pud_t pud) */ struct page *pmd_page(pmd_t pmd) { - if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd)) + if (pmd_is_leaf(pmd)) { + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); + } return virt_to_page(pmd_page_vaddr(pmd)); } diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 39bf1e2cba13..6a88a9f585d4 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -273,7 +273,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (!pmd_none(*pmd) && !pmd_huge(*pmd)) + if (!pmd_none(*pmd) && !pmd_is_leaf(*pmd)) /* pmd exists */ walk_pte(st, pmd, addr); else @@ -289,7 +289,7 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (!pud_none(*pud) && !pud_huge(*pud)) + if (!pud_none(*pud) && !pud_is_leaf(*pud)) /* pud exists */ walk_pmd(st, pud, addr); else @@ -310,7 +310,7 @@ static void walk_pagetables(struct pg_state *st) * the hash pagetable. */ for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) { - if (!pgd_none(*pgd) && !pgd_huge(*pgd)) + if (!pgd_none(*pgd) && !pgd_is_leaf(*pgd)) /* pgd exists */ walk_pud(st, pgd, addr); else diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index c2ee6041f02c..02a59946a78a 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -500,6 +500,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ /* slw clears top 32 bits */ PPC_SLW(dst_reg, dst_reg, src_reg); + /* skip zero extension move, but set address map. */ + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ PPC_SLD(dst_reg, dst_reg, src_reg); @@ -507,6 +510,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ /* with imm 0, we still need to clear top 32 bits */ PPC_SLWI(dst_reg, dst_reg, imm); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ if (imm != 0) @@ -514,12 +519,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ PPC_SRW(dst_reg, dst_reg, src_reg); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ PPC_SRD(dst_reg, dst_reg, src_reg); break; case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ PPC_SRWI(dst_reg, dst_reg, imm); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ if (imm != 0) @@ -544,6 +553,11 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, */ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + if (imm == 1) { + /* special mov32 for zext */ + PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); + break; + } PPC_MR(dst_reg, src_reg); goto bpf_alu32_trunc; case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ @@ -551,11 +565,13 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_LI32(dst_reg, imm); if (imm < 0) goto bpf_alu32_trunc; + else if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; bpf_alu32_trunc: /* Truncate to 32-bits */ - if (BPF_CLASS(code) == BPF_ALU) + if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext) PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); break; @@ -614,10 +630,13 @@ emit_clear: case 16: /* zero-extend 16 bits into 64 bits */ PPC_RLDICL(dst_reg, dst_reg, 0, 48); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; case 32: - /* zero-extend 32 bits into 64 bits */ - PPC_RLDICL(dst_reg, dst_reg, 0, 32); + if (!fp->aux->verifier_zext) + /* zero-extend 32 bits into 64 bits */ + PPC_RLDICL(dst_reg, dst_reg, 0, 32); break; case 64: /* nop */ @@ -694,14 +713,20 @@ emit_clear: /* dst = *(u8 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_B: PPC_LBZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u16 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_H: PPC_LHZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u32 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_W: PPC_LWZ(dst_reg, src_reg, off); + if (insn_is_zext(&insn[i + 1])) + addrs[++i] = ctx->idx * 4; break; /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: @@ -1042,6 +1067,11 @@ struct powerpc64_jit_data { struct codegen_context ctx; }; +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index faad5b315f49..573e0b309c0c 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -567,7 +567,7 @@ static int event_uniq_add(struct rb_root *root, const char *name, int nl, struct event_uniq *it; int result; - it = container_of(*new, struct event_uniq, node); + it = rb_entry(*new, struct event_uniq, node); result = ev_uniq_ord(name, nl, domain, it->name, it->nl, it->domain); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 3bdfc1e32096..dea243185ea4 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -362,7 +362,14 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) */ nid = cpu_to_node(cpu); l_cpumask = cpumask_of_node(nid); - target = cpumask_any_but(l_cpumask, cpu); + target = cpumask_last(l_cpumask); + + /* + * If this(target) is the last cpu in the cpumask for this chip, + * check for any possible online cpu in the chip. + */ + if (unlikely(target == cpu)) + target = cpumask_any_but(l_cpumask, cpu); /* * Update the cpumask with the target cpu and @@ -667,7 +674,10 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) return 0; /* Find any online cpu in that core except the current "cpu" */ - ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); + ncpu = cpumask_last(cpu_sibling_mask(cpu)); + + if (unlikely(ncpu == cpu)) + ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); if (ncpu >= 0 && ncpu < nr_cpu_ids) { cpumask_set_cpu(ncpu, &core_imc_cpumask); diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig index ad2bb1408b4c..6da813b65b42 100644 --- a/arch/powerpc/platforms/40x/Kconfig +++ b/arch/powerpc/platforms/40x/Kconfig @@ -16,12 +16,12 @@ config EP405 This option enables support for the EP405/EP405PC boards. config HOTFOOT - bool "Hotfoot" + bool "Hotfoot" depends on 40x select PPC40x_SIMPLE select FORCE_PCI - help - This option enables support for the ESTEEM 195E Hotfoot board. + help + This option enables support for the ESTEEM 195E Hotfoot board. config KILAUEA bool "Kilauea" @@ -80,7 +80,6 @@ config OBS600 help This option enables support for PlatHome OpenBlockS 600 server - config PPC40x_SIMPLE bool "Simple PowerPC 40x board support" depends on 40x diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 35be81fd2dc2..b369ed4e3675 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -40,12 +40,12 @@ config EBONY This option enables support for the IBM PPC440GP evaluation board. config SAM440EP - bool "Sam440ep" + bool "Sam440ep" depends on 44x - select 440EP - select FORCE_PCI - help - This option enables support for the ACube Sam440ep board. + select 440EP + select FORCE_PCI + help + This option enables support for the ACube Sam440ep board. config SEQUOIA bool "Sequoia" diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c index 31f12ad37a98..36fb66ce54cf 100644 --- a/arch/powerpc/platforms/4xx/uic.c +++ b/arch/powerpc/platforms/4xx/uic.c @@ -154,6 +154,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type) mtdcr(uic->dcrbase + UIC_PR, pr); mtdcr(uic->dcrbase + UIC_TR, tr); + mtdcr(uic->dcrbase + UIC_SR, ~mask); raw_spin_unlock_irqrestore(&uic->lock, flags); diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index d1af0ee2f8c8..fa3d29dcb57e 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -147,10 +147,10 @@ config SOCRATES This option enables support for the Socrates board. config KSI8560 - bool "Emerson KSI8560" - select DEFAULT_UIMAGE - help - This option enables support for the Emerson KSI8560 board + bool "Emerson KSI8560" + select DEFAULT_UIMAGE + help + This option enables support for the Emerson KSI8560 board config XES_MPC85xx bool "X-ES single-board computer" diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig index 0a610114bc38..07a9d60c618a 100644 --- a/arch/powerpc/platforms/86xx/Kconfig +++ b/arch/powerpc/platforms/86xx/Kconfig @@ -62,9 +62,9 @@ config GEF_SBC610 This option enables support for the GE SBC610. config MVME7100 - bool "Artesyn MVME7100" - help - This option enables support for the Emerson/Artesyn MVME7100 board. + bool "Artesyn MVME7100" + help + This option enables support for the Emerson/Artesyn MVME7100 board. endif diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index d408162d5af4..e0fe670f06f6 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -157,6 +157,13 @@ config I2C_SPI_SMC1_UCODE_PATCH help Help not implemented yet, coming soon. +config SMC_UCODE_PATCH + bool "SMC relocation patch" + help + This microcode relocates SMC1 and SMC2 parameter RAMs at + offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM + for SCC3 and SCC4. + endchoice config UCODE_PATCH diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile index 708ab099e886..27a7c6f828e0 100644 --- a/arch/powerpc/platforms/8xx/Makefile +++ b/arch/powerpc/platforms/8xx/Makefile @@ -3,6 +3,8 @@ # Makefile for the PowerPC 8xx linux kernel. # obj-y += m8xx_setup.o machine_check.o pic.o +obj-$(CONFIG_CPM1) += cpm1.o +obj-$(CONFIG_UCODE_PATCH) += micropatch.o obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o obj-$(CONFIG_PPC_EP88XC) += ep88xc.o diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 4f8dcf124828..0f65c51271db 100644 --- a/arch/powerpc/sysdev/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -88,7 +88,8 @@ int cpm_get_irq(void) { int cpm_vec; - /* Get the vector by setting the ACK bit and then reading + /* + * Get the vector by setting the ACK bit and then reading * the register. */ out_be16(&cpic_reg->cpic_civr, 1); @@ -108,7 +109,8 @@ static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq, return 0; } -/* The CPM can generate the error interrupt when there is a race condition +/* + * The CPM can generate the error interrupt when there is a race condition * between generating and masking interrupts. All we have to do is ACK it * and return. This is a no-op function so we don't need any special * tests in the interrupt handler. @@ -208,12 +210,10 @@ void __init cpm_reset(void) cpmp = &mpc8xx_immr->im_cpm; #ifndef CONFIG_PPC_EARLY_DEBUG_CPM - /* Perform a reset. - */ + /* Perform a reset. */ out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG); - /* Wait for it. - */ + /* Wait for it. */ while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG); #endif @@ -221,7 +221,8 @@ void __init cpm_reset(void) cpm_load_patch(cpmp); #endif - /* Set SDMA Bus Request priority 5. + /* + * Set SDMA Bus Request priority 5. * On 860T, this also enables FEC priority 6. I am not sure * this is what we really want for some applications, but the * manual recommends it. @@ -263,7 +264,8 @@ out: } EXPORT_SYMBOL(cpm_command); -/* Set a baud rate generator. This needs lots of work. There are +/* + * Set a baud rate generator. This needs lots of work. There are * four BRGs, any of which can be wired to any channel. * The internal baud rate clock is the system clock divided by 16. * This assumes the baudrate is 16x oversampled by the uart. @@ -277,11 +279,11 @@ cpm_setbrg(uint brg, uint rate) { u32 __iomem *bp; - /* This is good enough to get SMCs running..... - */ + /* This is good enough to get SMCs running..... */ bp = &cpmp->cp_brgc1; bp += brg; - /* The BRG has a 12-bit counter. For really slow baud rates (or + /* + * The BRG has a 12-bit counter. For really slow baud rates (or * really fast processors), we may have to further divide by 16. */ if (((BRG_UART_CLK / rate) - 1) < 4096) diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c new file mode 100644 index 000000000000..c80bd7afd6c5 --- /dev/null +++ b/arch/powerpc/platforms/8xx/micropatch.c @@ -0,0 +1,378 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Microcode patches for the CPM as supplied by Motorola. + * This is the one for IIC/SPI. There is a newer one that + * also relocates SMC2, but this would require additional changes + * to uart.c, so I am holding off on that for a moment. + */ +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/8xx_immap.h> +#include <asm/cpm.h> +#include <asm/cpm1.h> + +struct patch_params { + ushort rccr; + ushort cpmcr1; + ushort cpmcr2; + ushort cpmcr3; + ushort cpmcr4; +}; + +/* + * I2C/SPI relocation patch arrays. + */ + +#ifdef CONFIG_I2C_SPI_UCODE_PATCH + +static char patch_name[] __initdata = "I2C/SPI"; + +static struct patch_params patch_params __initdata = { + 1, 0x802a, 0x8028, 0x802e, 0x802c, +}; + +static uint patch_2000[] __initdata = { + 0x7FFFEFD9, 0x3FFD0000, 0x7FFB49F7, 0x7FF90000, + 0x5FEFADF7, 0x5F89ADF7, 0x5FEFAFF7, 0x5F89AFF7, + 0x3A9CFBC8, 0xE7C0EDF0, 0x77C1E1BB, 0xF4DC7F1D, + 0xABAD932F, 0x4E08FDCF, 0x6E0FAFF8, 0x7CCF76CF, + 0xFD1FF9CF, 0xABF88DC6, 0xAB5679F7, 0xB0937383, + 0xDFCE79F7, 0xB091E6BB, 0xE5BBE74F, 0xB3FA6F0F, + 0x6FFB76CE, 0xEE0DF9CF, 0x2BFBEFEF, 0xCFEEF9CF, + 0x76CEAD24, 0x90B2DF9A, 0x7FDDD0BF, 0x4BF847FD, + 0x7CCF76CE, 0xCFEF7E1F, 0x7F1D7DFD, 0xF0B6EF71, + 0x7FC177C1, 0xFBC86079, 0xE722FBC8, 0x5FFFDFFF, + 0x5FB2FFFB, 0xFBC8F3C8, 0x94A67F01, 0x7F1D5F39, + 0xAFE85F5E, 0xFFDFDF96, 0xCB9FAF7D, 0x5FC1AFED, + 0x8C1C5FC1, 0xAFDD5FC3, 0xDF9A7EFD, 0xB0B25FB2, + 0xFFFEABAD, 0x5FB2FFFE, 0x5FCE600B, 0xE6BB600B, + 0x5FCEDFC6, 0x27FBEFDF, 0x5FC8CFDE, 0x3A9CE7C0, + 0xEDF0F3C8, 0x7F0154CD, 0x7F1D2D3D, 0x363A7570, + 0x7E0AF1CE, 0x37EF2E68, 0x7FEE10EC, 0xADF8EFDE, + 0xCFEAE52F, 0x7D0FE12B, 0xF1CE5F65, 0x7E0A4DF8, + 0xCFEA5F72, 0x7D0BEFEE, 0xCFEA5F74, 0xE522EFDE, + 0x5F74CFDA, 0x0B627385, 0xDF627E0A, 0x30D8145B, + 0xBFFFF3C8, 0x5FFFDFFF, 0xA7F85F5E, 0xBFFE7F7D, + 0x10D31450, 0x5F36BFFF, 0xAF785F5E, 0xBFFDA7F8, + 0x5F36BFFE, 0x77FD30C0, 0x4E08FDCF, 0xE5FF6E0F, + 0xAFF87E1F, 0x7E0FFD1F, 0xF1CF5F1B, 0xABF80D5E, + 0x5F5EFFEF, 0x79F730A2, 0xAFDD5F34, 0x47F85F34, + 0xAFED7FDD, 0x50B24978, 0x47FD7F1D, 0x7DFD70AD, + 0xEF717EC1, 0x6BA47F01, 0x2D267EFD, 0x30DE5F5E, + 0xFFFD5F5E, 0xFFEF5F5E, 0xFFDF0CA0, 0xAFED0A9E, + 0xAFDD0C3A, 0x5F3AAFBD, 0x7FBDB082, 0x5F8247F8 +}; + +static uint patch_2f00[] __initdata = { + 0x3E303430, 0x34343737, 0xABF7BF9B, 0x994B4FBD, + 0xBD599493, 0x349FFF37, 0xFB9B177D, 0xD9936956, + 0xBBFDD697, 0xBDD2FD11, 0x31DB9BB3, 0x63139637, + 0x93733693, 0x193137F7, 0x331737AF, 0x7BB9B999, + 0xBB197957, 0x7FDFD3D5, 0x73B773F7, 0x37933B99, + 0x1D115316, 0x99315315, 0x31694BF4, 0xFBDBD359, + 0x31497353, 0x76956D69, 0x7B9D9693, 0x13131979, + 0x79376935 +}; + +static uint patch_2e00[] __initdata = {}; +#endif + +/* + * I2C/SPI/SMC1 relocation patch arrays. + */ + +#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH + +static char patch_name[] __initdata = "I2C/SPI/SMC1"; + +static struct patch_params patch_params __initdata = { + 3, 0x8080, 0x808a, 0x8028, 0x802a, +}; + +static uint patch_2000[] __initdata = { + 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000, + 0x5f13eff8, 0x5eb5eff8, 0x5f88adf7, 0x5fefadf7, + 0x3a9cfbc8, 0x77cae1bb, 0xf4de7fad, 0xabae9330, + 0x4e08fdcf, 0x6e0faff8, 0x7ccf76cf, 0xfdaff9cf, + 0xabf88dc8, 0xab5879f7, 0xb0925d8d, 0xdfd079f7, + 0xb090e6bb, 0xe5bbe74f, 0x9e046f0f, 0x6ffb76ce, + 0xee0cf9cf, 0x2bfbefef, 0xcfeef9cf, 0x76cead23, + 0x90b3df99, 0x7fddd0c1, 0x4bf847fd, 0x7ccf76ce, + 0xcfef77ca, 0x7eaf7fad, 0x7dfdf0b7, 0xef7a7fca, + 0x77cafbc8, 0x6079e722, 0xfbc85fff, 0xdfff5fb3, + 0xfffbfbc8, 0xf3c894a5, 0xe7c9edf9, 0x7f9a7fad, + 0x5f36afe8, 0x5f5bffdf, 0xdf95cb9e, 0xaf7d5fc3, + 0xafed8c1b, 0x5fc3afdd, 0x5fc5df99, 0x7efdb0b3, + 0x5fb3fffe, 0xabae5fb3, 0xfffe5fd0, 0x600be6bb, + 0x600b5fd0, 0xdfc827fb, 0xefdf5fca, 0xcfde3a9c, + 0xe7c9edf9, 0xf3c87f9e, 0x54ca7fed, 0x2d3a3637, + 0x756f7e9a, 0xf1ce37ef, 0x2e677fee, 0x10ebadf8, + 0xefdecfea, 0xe52f7d9f, 0xe12bf1ce, 0x5f647e9a, + 0x4df8cfea, 0x5f717d9b, 0xefeecfea, 0x5f73e522, + 0xefde5f73, 0xcfda0b61, 0x5d8fdf61, 0xe7c9edf9, + 0x7e9a30d5, 0x1458bfff, 0xf3c85fff, 0xdfffa7f8, + 0x5f5bbffe, 0x7f7d10d0, 0x144d5f33, 0xbfffaf78, + 0x5f5bbffd, 0xa7f85f33, 0xbffe77fd, 0x30bd4e08, + 0xfdcfe5ff, 0x6e0faff8, 0x7eef7e9f, 0xfdeff1cf, + 0x5f17abf8, 0x0d5b5f5b, 0xffef79f7, 0x309eafdd, + 0x5f3147f8, 0x5f31afed, 0x7fdd50af, 0x497847fd, + 0x7f9e7fed, 0x7dfd70a9, 0xef7e7ece, 0x6ba07f9e, + 0x2d227efd, 0x30db5f5b, 0xfffd5f5b, 0xffef5f5b, + 0xffdf0c9c, 0xafed0a9a, 0xafdd0c37, 0x5f37afbd, + 0x7fbdb081, 0x5f8147f8, 0x3a11e710, 0xedf0ccdd, + 0xf3186d0a, 0x7f0e5f06, 0x7fedbb38, 0x3afe7468, + 0x7fedf4fc, 0x8ffbb951, 0xb85f77fd, 0xb0df5ddd, + 0xdefe7fed, 0x90e1e74d, 0x6f0dcbf7, 0xe7decfed, + 0xcb74cfed, 0xcfeddf6d, 0x91714f74, 0x5dd2deef, + 0x9e04e7df, 0xefbb6ffb, 0xe7ef7f0e, 0x9e097fed, + 0xebdbeffa, 0xeb54affb, 0x7fea90d7, 0x7e0cf0c3, + 0xbffff318, 0x5fffdfff, 0xac59efea, 0x7fce1ee5, + 0xe2ff5ee1, 0xaffbe2ff, 0x5ee3affb, 0xf9cc7d0f, + 0xaef8770f, 0x7d0fb0c6, 0xeffbbfff, 0xcfef5ede, + 0x7d0fbfff, 0x5ede4cf8, 0x7fddd0bf, 0x49f847fd, + 0x7efdf0bb, 0x7fedfffd, 0x7dfdf0b7, 0xef7e7e1e, + 0x5ede7f0e, 0x3a11e710, 0xedf0ccab, 0xfb18ad2e, + 0x1ea9bbb8, 0x74283b7e, 0x73c2e4bb, 0x2ada4fb8, + 0xdc21e4bb, 0xb2a1ffbf, 0x5e2c43f8, 0xfc87e1bb, + 0xe74ffd91, 0x6f0f4fe8, 0xc7ba32e2, 0xf396efeb, + 0x600b4f78, 0xe5bb760b, 0x53acaef8, 0x4ef88b0e, + 0xcfef9e09, 0xabf8751f, 0xefef5bac, 0x741f4fe8, + 0x751e760d, 0x7fdbf081, 0x741cafce, 0xefcc7fce, + 0x751e70ac, 0x741ce7bb, 0x3372cfed, 0xafdbefeb, + 0xe5bb760b, 0x53f2aef8, 0xafe8e7eb, 0x4bf8771e, + 0x7e247fed, 0x4fcbe2cc, 0x7fbc30a9, 0x7b0f7a0f, + 0x34d577fd, 0x308b5db7, 0xde553e5f, 0xaf78741f, + 0x741f30f0, 0xcfef5e2c, 0x741f3eac, 0xafb8771e, + 0x5e677fed, 0x0bd3e2cc, 0x741ccfec, 0xe5ca53cd, + 0x6fcb4f74, 0x5dadde4b, 0x2ab63d38, 0x4bb3de30, + 0x751f741c, 0x6c42effa, 0xefea7fce, 0x6ffc30be, + 0xefec3fca, 0x30b3de2e, 0xadf85d9e, 0xaf7daefd, + 0x5d9ede2e, 0x5d9eafdd, 0x761f10ac, 0x1da07efd, + 0x30adfffe, 0x4908fb18, 0x5fffdfff, 0xafbb709b, + 0x4ef85e67, 0xadf814ad, 0x7a0f70ad, 0xcfef50ad, + 0x7a0fde30, 0x5da0afed, 0x3c12780f, 0xefef780f, + 0xefef790f, 0xa7f85e0f, 0xffef790f, 0xefef790f, + 0x14adde2e, 0x5d9eadfd, 0x5e2dfffb, 0xe79addfd, + 0xeff96079, 0x607ae79a, 0xddfceff9, 0x60795dff, + 0x607acfef, 0xefefefdf, 0xefbfef7f, 0xeeffedff, + 0xebffe7ff, 0xafefafdf, 0xafbfaf7f, 0xaeffadff, + 0xabffa7ff, 0x6fef6fdf, 0x6fbf6f7f, 0x6eff6dff, + 0x6bff67ff, 0x2fef2fdf, 0x2fbf2f7f, 0x2eff2dff, + 0x2bff27ff, 0x4e08fd1f, 0xe5ff6e0f, 0xaff87eef, + 0x7e0ffdef, 0xf11f6079, 0xabf8f542, 0x7e0af11c, + 0x37cfae3a, 0x7fec90be, 0xadf8efdc, 0xcfeae52f, + 0x7d0fe12b, 0xf11c6079, 0x7e0a4df8, 0xcfea5dc4, + 0x7d0befec, 0xcfea5dc6, 0xe522efdc, 0x5dc6cfda, + 0x4e08fd1f, 0x6e0faff8, 0x7c1f761f, 0xfdeff91f, + 0x6079abf8, 0x761cee24, 0xf91f2bfb, 0xefefcfec, + 0xf91f6079, 0x761c27fb, 0xefdf5da7, 0xcfdc7fdd, + 0xd09c4bf8, 0x47fd7c1f, 0x761ccfcf, 0x7eef7fed, + 0x7dfdf093, 0xef7e7f1e, 0x771efb18, 0x6079e722, + 0xe6bbe5bb, 0xae0ae5bb, 0x600bae85, 0xe2bbe2bb, + 0xe2bbe2bb, 0xaf02e2bb, 0xe2bb2ff9, 0x6079e2bb +}; + +static uint patch_2f00[] __initdata = { + 0x30303030, 0x3e3e3434, 0xabbf9b99, 0x4b4fbdbd, + 0x59949334, 0x9fff37fb, 0x9b177dd9, 0x936956bb, + 0xfbdd697b, 0xdd2fd113, 0x1db9f7bb, 0x36313963, + 0x79373369, 0x3193137f, 0x7331737a, 0xf7bb9b99, + 0x9bb19795, 0x77fdfd3d, 0x573b773f, 0x737933f7, + 0xb991d115, 0x31699315, 0x31531694, 0xbf4fbdbd, + 0x35931497, 0x35376956, 0xbd697b9d, 0x96931313, + 0x19797937, 0x6935af78, 0xb9b3baa3, 0xb8788683, + 0x368f78f7, 0x87778733, 0x3ffffb3b, 0x8e8f78b8, + 0x1d118e13, 0xf3ff3f8b, 0x6bd8e173, 0xd1366856, + 0x68d1687b, 0x3daf78b8, 0x3a3a3f87, 0x8f81378f, + 0xf876f887, 0x77fd8778, 0x737de8d6, 0xbbf8bfff, + 0xd8df87f7, 0xfd876f7b, 0x8bfff8bd, 0x8683387d, + 0xb873d87b, 0x3b8fd7f8, 0xf7338883, 0xbb8ee1f8, + 0xef837377, 0x3337b836, 0x817d11f8, 0x7378b878, + 0xd3368b7d, 0xed731b7d, 0x833731f3, 0xf22f3f23 +}; + +static uint patch_2e00[] __initdata = { + 0x27eeeeee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee, + 0xee4bf4fb, 0xdbd259bb, 0x1979577f, 0xdfd2d573, + 0xb773f737, 0x4b4fbdbd, 0x25b9b177, 0xd2d17376, + 0x956bbfdd, 0x697bdd2f, 0xff9f79ff, 0xff9ff22f +}; +#endif + +/* + * USB SOF patch arrays. + */ + +#ifdef CONFIG_USB_SOF_UCODE_PATCH + +static char patch_name[] __initdata = "USB SOF"; + +static struct patch_params patch_params __initdata = { + 9, +}; + +static uint patch_2000[] __initdata = { + 0x7fff0000, 0x7ffd0000, 0x7ffb0000, 0x49f7ba5b, + 0xba383ffb, 0xf9b8b46d, 0xe5ab4e07, 0xaf77bffe, + 0x3f7bbf79, 0xba5bba38, 0xe7676076, 0x60750000 +}; + +static uint patch_2f00[] __initdata = { + 0x3030304c, 0xcab9e441, 0xa1aaf220 +}; + +static uint patch_2e00[] __initdata = {}; +#endif + +/* + * SMC relocation patch arrays. + */ + +#ifdef CONFIG_SMC_UCODE_PATCH + +static char patch_name[] __initdata = "SMC"; + +static struct patch_params patch_params __initdata = { + 2, 0x8080, 0x8088, +}; + +static uint patch_2000[] __initdata = { + 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000, + 0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000, + 0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2, + 0x7fedbb38, 0x3afe7468, 0x7fedf4d8, 0x8ffbb92d, + 0xb83b77fd, 0xb0bb5eb9, 0xdfda7fed, 0x90bde74d, + 0x6f0dcbd3, 0xe7decfed, 0xcb50cfed, 0xcfeddf6d, + 0x914d4f74, 0x5eaedfcb, 0x9ee0e7df, 0xefbb6ffb, + 0xe7ef7f0e, 0x9ee57fed, 0xebb7effa, 0xeb30affb, + 0x7fea90b3, 0x7e0cf09f, 0xbffff318, 0x5fffdfff, + 0xac35efea, 0x7fce1fc1, 0xe2ff5fbd, 0xaffbe2ff, + 0x5fbfaffb, 0xf9a87d0f, 0xaef8770f, 0x7d0fb0a2, + 0xeffbbfff, 0xcfef5fba, 0x7d0fbfff, 0x5fba4cf8, + 0x7fddd09b, 0x49f847fd, 0x7efdf097, 0x7fedfffd, + 0x7dfdf093, 0xef7e7e1e, 0x5fba7f0e, 0x3a11e710, + 0xedf0cc87, 0xfb18ad0a, 0x1f85bbb8, 0x74283b7e, + 0x7375e4bb, 0x2ab64fb8, 0x5c7de4bb, 0x32fdffbf, + 0x5f0843f8, 0x7ce3e1bb, 0xe74f7ded, 0x6f0f4fe8, + 0xc7ba32be, 0x73f2efeb, 0x600b4f78, 0xe5bb760b, + 0x5388aef8, 0x4ef80b6a, 0xcfef9ee5, 0xabf8751f, + 0xefef5b88, 0x741f4fe8, 0x751e760d, 0x7fdb70dd, + 0x741cafce, 0xefcc7fce, 0x751e7088, 0x741ce7bb, + 0x334ecfed, 0xafdbefeb, 0xe5bb760b, 0x53ceaef8, + 0xafe8e7eb, 0x4bf8771e, 0x7e007fed, 0x4fcbe2cc, + 0x7fbc3085, 0x7b0f7a0f, 0x34b177fd, 0xb0e75e93, + 0xdf313e3b, 0xaf78741f, 0x741f30cc, 0xcfef5f08, + 0x741f3e88, 0xafb8771e, 0x5f437fed, 0x0bafe2cc, + 0x741ccfec, 0xe5ca53a9, 0x6fcb4f74, 0x5e89df27, + 0x2a923d14, 0x4b8fdf0c, 0x751f741c, 0x6c1eeffa, + 0xefea7fce, 0x6ffc309a, 0xefec3fca, 0x308fdf0a, + 0xadf85e7a, 0xaf7daefd, 0x5e7adf0a, 0x5e7aafdd, + 0x761f1088, 0x1e7c7efd, 0x3089fffe, 0x4908fb18, + 0x5fffdfff, 0xafbbf0f7, 0x4ef85f43, 0xadf81489, + 0x7a0f7089, 0xcfef5089, 0x7a0fdf0c, 0x5e7cafed, + 0xbc6e780f, 0xefef780f, 0xefef790f, 0xa7f85eeb, + 0xffef790f, 0xefef790f, 0x1489df0a, 0x5e7aadfd, + 0x5f09fffb, 0xe79aded9, 0xeff96079, 0x607ae79a, + 0xded8eff9, 0x60795edb, 0x607acfef, 0xefefefdf, + 0xefbfef7f, 0xeeffedff, 0xebffe7ff, 0xafefafdf, + 0xafbfaf7f, 0xaeffadff, 0xabffa7ff, 0x6fef6fdf, + 0x6fbf6f7f, 0x6eff6dff, 0x6bff67ff, 0x2fef2fdf, + 0x2fbf2f7f, 0x2eff2dff, 0x2bff27ff, 0x4e08fd1f, + 0xe5ff6e0f, 0xaff87eef, 0x7e0ffdef, 0xf11f6079, + 0xabf8f51e, 0x7e0af11c, 0x37cfae16, 0x7fec909a, + 0xadf8efdc, 0xcfeae52f, 0x7d0fe12b, 0xf11c6079, + 0x7e0a4df8, 0xcfea5ea0, 0x7d0befec, 0xcfea5ea2, + 0xe522efdc, 0x5ea2cfda, 0x4e08fd1f, 0x6e0faff8, + 0x7c1f761f, 0xfdeff91f, 0x6079abf8, 0x761cee00, + 0xf91f2bfb, 0xefefcfec, 0xf91f6079, 0x761c27fb, + 0xefdf5e83, 0xcfdc7fdd, 0x50f84bf8, 0x47fd7c1f, + 0x761ccfcf, 0x7eef7fed, 0x7dfd70ef, 0xef7e7f1e, + 0x771efb18, 0x6079e722, 0xe6bbe5bb, 0x2e66e5bb, + 0x600b2ee1, 0xe2bbe2bb, 0xe2bbe2bb, 0x2f5ee2bb, + 0xe2bb2ff9, 0x6079e2bb, +}; + +static uint patch_2f00[] __initdata = { + 0x30303030, 0x3e3e3030, 0xaf79b9b3, 0xbaa3b979, + 0x9693369f, 0x79f79777, 0x97333fff, 0xfb3b9e9f, + 0x79b91d11, 0x9e13f3ff, 0x3f9b6bd9, 0xe173d136, + 0x695669d1, 0x697b3daf, 0x79b93a3a, 0x3f979f91, + 0x379ff976, 0xf99777fd, 0x9779737d, 0xe9d6bbf9, + 0xbfffd9df, 0x97f7fd97, 0x6f7b9bff, 0xf9bd9683, + 0x397db973, 0xd97b3b9f, 0xd7f9f733, 0x9993bb9e, + 0xe1f9ef93, 0x73773337, 0xb936917d, 0x11f87379, + 0xb979d336, 0x8b7ded73, 0x1b7d9337, 0x31f3f22f, + 0x3f2327ee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee, + 0xeeeeee4b, 0xf4fbdbd2, 0x58bb1878, 0x577fdfd2, + 0xd573b773, 0xf7374b4f, 0xbdbd25b8, 0xb177d2d1, + 0x7376856b, 0xbfdd687b, 0xdd2fff8f, 0x78ffff8f, + 0xf22f0000, +}; + +static uint patch_2e00[] __initdata = {}; +#endif + +static void __init cpm_write_patch(cpm8xx_t *cp, int offset, uint *patch, int len) +{ + if (!len) + return; + memcpy_toio(cp->cp_dpmem + offset, patch, len); +} + +void __init cpm_load_patch(cpm8xx_t *cp) +{ + out_be16(&cp->cp_rccr, 0); + + cpm_write_patch(cp, 0, patch_2000, sizeof(patch_2000)); + cpm_write_patch(cp, 0xf00, patch_2f00, sizeof(patch_2f00)); + cpm_write_patch(cp, 0xe00, patch_2e00, sizeof(patch_2e00)); + + if (IS_ENABLED(CONFIG_I2C_SPI_UCODE_PATCH) || + IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) { + u16 rpbase = 0x500; + iic_t *iip; + struct spi_pram *spp; + + iip = (iic_t *)&cp->cp_dparam[PROFF_IIC]; + out_be16(&iip->iic_rpbase, rpbase); + + /* Put SPI above the IIC, also 32-byte aligned. */ + spp = (struct spi_pram *)&cp->cp_dparam[PROFF_SPI]; + out_be16(&spp->rpbase, (rpbase + sizeof(iic_t) + 31) & ~31); + + if (IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) { + smc_uart_t *smp; + + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; + out_be16(&smp->smc_rpbase, 0x1FC0); + } + } + + if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) { + smc_uart_t *smp; + + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1]; + out_be16(&smp->smc_rpbase, 0x1ec0); + smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2]; + out_be16(&smp->smc_rpbase, 0x1fc0); + } + + out_be16(&cp->cp_cpmcr1, patch_params.cpmcr1); + out_be16(&cp->cp_cpmcr2, patch_params.cpmcr2); + out_be16(&cp->cp_cpmcr3, patch_params.cpmcr3); + out_be16(&cp->cp_cpmcr4, patch_params.cpmcr4); + + out_be16(&cp->cp_rccr, patch_params.rccr); + + pr_info("%s microcode patch installed\n", patch_name); +} diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 2794235e9d3e..56a7c814160d 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -330,7 +330,7 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" - depends on PPC_BOOK3S_64 && HUGETLB_PAGE + depends on PPC_BOOK3S_64 select ARCH_HAS_GIGANTIC_PAGE select PPC_HAVE_KUEP select PPC_HAVE_KUAP diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 6dfd2cb1bce7..24adbe3c605c 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -31,22 +31,21 @@ static void spufs_handle_event(struct spu_context *ctx, switch (type) { case SPE_EVENT_INVALID_DMA: - force_sig_fault(SIGBUS, BUS_OBJERR, NULL, current); + force_sig_fault(SIGBUS, BUS_OBJERR, NULL); break; case SPE_EVENT_SPE_DATA_STORAGE: ctx->ops->restart_dma(ctx); - force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea, - current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea); break; case SPE_EVENT_DMA_ALIGNMENT: /* DAR isn't set for an alignment fault :( */ - force_sig_fault(SIGBUS, BUS_ADRALN, NULL, current); + force_sig_fault(SIGBUS, BUS_ADRALN, NULL); break; case SPE_EVENT_SPE_ERROR: force_sig_fault( SIGILL, ILL_ILLOPC, (void __user *)(unsigned long) - ctx->ops->npc_read(ctx) - 4, current); + ctx->ops->npc_read(ctx) - 4); break; } } diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index d40253a18b1c..c0f950a3f4e1 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -446,7 +446,7 @@ static const struct file_operations spufs_cntl_fops = { .release = spufs_cntl_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = generic_file_llseek, + .llseek = no_llseek, .mmap = spufs_cntl_mmap, }; diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 07f82d7395ff..3f2380f40f99 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -443,7 +443,7 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event) else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) { - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); ret = -ERESTARTSYS; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index e56b553de27b..f18d5067cd0f 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -128,7 +128,7 @@ void __spu_update_sched_info(struct spu_context *ctx) * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted. */ - cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); + cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); /* Save the current cpu id for spu interrupt routing. */ ctx->last_ran = raw_smp_processor_id(); diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig index 08d530a2a8b1..86ae210bee9a 100644 --- a/arch/powerpc/platforms/maple/Kconfig +++ b/arch/powerpc/platforms/maple/Kconfig @@ -14,5 +14,5 @@ config PPC_MAPLE select MMIO_NVRAM select ATA_NONSTANDARD if ATA help - This option enables support for the Maple 970FX Evaluation Board. + This option enables support for the Maple 970FX Evaluation Board. For more information, refer to <http://www.970eval.com> diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S index 6bbcbec97712..bd6085b470b7 100644 --- a/arch/powerpc/platforms/powermac/sleep.S +++ b/arch/powerpc/platforms/powermac/sleep.S @@ -33,10 +33,18 @@ #define SL_IBAT2 0x48 #define SL_DBAT3 0x50 #define SL_IBAT3 0x58 -#define SL_TB 0x60 -#define SL_R2 0x68 -#define SL_CR 0x6c -#define SL_R12 0x70 /* r12 to r31 */ +#define SL_DBAT4 0x60 +#define SL_IBAT4 0x68 +#define SL_DBAT5 0x70 +#define SL_IBAT5 0x78 +#define SL_DBAT6 0x80 +#define SL_IBAT6 0x88 +#define SL_DBAT7 0x90 +#define SL_IBAT7 0x98 +#define SL_TB 0xa0 +#define SL_R2 0xa8 +#define SL_CR 0xac +#define SL_R12 0xb0 /* r12 to r31 */ #define SL_SIZE (SL_R12 + 80) .section .text @@ -121,6 +129,41 @@ _GLOBAL(low_sleep_handler) mfibatl r4,3 stw r4,SL_IBAT3+4(r1) +BEGIN_MMU_FTR_SECTION + mfspr r4,SPRN_DBAT4U + stw r4,SL_DBAT4(r1) + mfspr r4,SPRN_DBAT4L + stw r4,SL_DBAT4+4(r1) + mfspr r4,SPRN_DBAT5U + stw r4,SL_DBAT5(r1) + mfspr r4,SPRN_DBAT5L + stw r4,SL_DBAT5+4(r1) + mfspr r4,SPRN_DBAT6U + stw r4,SL_DBAT6(r1) + mfspr r4,SPRN_DBAT6L + stw r4,SL_DBAT6+4(r1) + mfspr r4,SPRN_DBAT7U + stw r4,SL_DBAT7(r1) + mfspr r4,SPRN_DBAT7L + stw r4,SL_DBAT7+4(r1) + mfspr r4,SPRN_IBAT4U + stw r4,SL_IBAT4(r1) + mfspr r4,SPRN_IBAT4L + stw r4,SL_IBAT4+4(r1) + mfspr r4,SPRN_IBAT5U + stw r4,SL_IBAT5(r1) + mfspr r4,SPRN_IBAT5L + stw r4,SL_IBAT5+4(r1) + mfspr r4,SPRN_IBAT6U + stw r4,SL_IBAT6(r1) + mfspr r4,SPRN_IBAT6L + stw r4,SL_IBAT6+4(r1) + mfspr r4,SPRN_IBAT7U + stw r4,SL_IBAT7(r1) + mfspr r4,SPRN_IBAT7L + stw r4,SL_IBAT7+4(r1) +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + /* Backup various CPU config stuffs */ bl __save_cpu_setup @@ -321,22 +364,37 @@ grackle_wake_up: mtibatl 3,r4 BEGIN_MMU_FTR_SECTION - li r4,0 + lwz r4,SL_DBAT4(r1) mtspr SPRN_DBAT4U,r4 + lwz r4,SL_DBAT4+4(r1) mtspr SPRN_DBAT4L,r4 + lwz r4,SL_DBAT5(r1) mtspr SPRN_DBAT5U,r4 + lwz r4,SL_DBAT5+4(r1) mtspr SPRN_DBAT5L,r4 + lwz r4,SL_DBAT6(r1) mtspr SPRN_DBAT6U,r4 + lwz r4,SL_DBAT6+4(r1) mtspr SPRN_DBAT6L,r4 + lwz r4,SL_DBAT7(r1) mtspr SPRN_DBAT7U,r4 + lwz r4,SL_DBAT7+4(r1) mtspr SPRN_DBAT7L,r4 + lwz r4,SL_IBAT4(r1) mtspr SPRN_IBAT4U,r4 + lwz r4,SL_IBAT4+4(r1) mtspr SPRN_IBAT4L,r4 + lwz r4,SL_IBAT5(r1) mtspr SPRN_IBAT5U,r4 + lwz r4,SL_IBAT5+4(r1) mtspr SPRN_IBAT5L,r4 + lwz r4,SL_IBAT6(r1) mtspr SPRN_IBAT6U,r4 + lwz r4,SL_IBAT6+4(r1) mtspr SPRN_IBAT6L,r4 + lwz r4,SL_IBAT7(r1) mtspr SPRN_IBAT7U,r4 + lwz r4,SL_IBAT7+4(r1) mtspr SPRN_IBAT7L,r4 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 9ade4489f415..620a986209f5 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * The file intends to implement the platform dependent EEH operations on - * powernv platform. Actually, the powernv was created in order to fully - * hypervisor support. + * PowerNV Platform dependent EEH operations * * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2f4479b94ac3..09f49eed7fb8 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -716,7 +716,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) * to reload MMCR0 (see mmcr0 comment above). */ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { - asm volatile(PPC_INVALIDATE_ERAT); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT); mtspr(SPRN_MMCR0, mmcr0); } @@ -758,7 +758,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) mtspr(SPRN_PTCR, sprs.ptcr); mtspr(SPRN_RPR, sprs.rpr); mtspr(SPRN_TSCR, sprs.tscr); - mtspr(SPRN_LDBAR, sprs.ldbar); if (pls >= pnv_first_tb_loss_level) { /* TB loss */ @@ -790,6 +789,7 @@ core_woken: mtspr(SPRN_MMCR0, sprs.mmcr0); mtspr(SPRN_MMCR1, sprs.mmcr1); mtspr(SPRN_MMCR2, sprs.mmcr2); + mtspr(SPRN_LDBAR, sprs.ldbar); mtspr(SPRN_SPRG3, local_paca->sprg_vdso); @@ -1155,10 +1155,10 @@ static void __init pnv_power9_idle_init(void) pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n", pnv_first_spr_loss_level); - pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n", pnv_first_tb_loss_level); } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index c321fdbc2200..c16249d251f1 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -19,18 +19,25 @@ #include "pci.h" -/* - * spinlock to protect initialisation of an npu_context for a particular - * mm_struct. - */ -static DEFINE_SPINLOCK(npu_context_lock); - static struct pci_dev *get_pci_dev(struct device_node *dn) { struct pci_dn *pdn = PCI_DN(dn); + struct pci_dev *pdev; - return pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), + pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), pdn->busno, pdn->devfn); + + /* + * pci_get_domain_bus_and_slot() increased the reference count of + * the PCI device, but callers don't need that actually as the PE + * already holds a reference to the device. Since callers aren't + * aware of the reference count change, call pci_dev_put() now to + * avoid leaks. + */ + if (pdev) + pci_dev_put(pdev); + + return pdev; } /* Given a NPU device get the associated PCI device. */ @@ -359,15 +366,6 @@ struct npu_comp { /* An NPU descriptor, valid for POWER9 only */ struct npu { int index; - __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS]; - unsigned int mmio_atsd_count; - - /* Bitmask for MMIO register usage */ - unsigned long mmio_atsd_usage; - - /* Do we need to explicitly flush the nest mmu? */ - bool nmmu_flush; - struct npu_comp npucomp; }; @@ -624,534 +622,8 @@ struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) } #endif /* CONFIG_IOMMU_API */ -/* Maximum number of nvlinks per npu */ -#define NV_MAX_LINKS 6 - -/* Maximum index of npu2 hosts in the system. Always < NV_MAX_NPUS */ -static int max_npu2_index; - -struct npu_context { - struct mm_struct *mm; - struct pci_dev *npdev[NV_MAX_NPUS][NV_MAX_LINKS]; - struct mmu_notifier mn; - struct kref kref; - bool nmmu_flush; - - /* Callback to stop translation requests on a given GPU */ - void (*release_cb)(struct npu_context *context, void *priv); - - /* - * Private pointer passed to the above callback for usage by - * device drivers. - */ - void *priv; -}; - -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - -/* - * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC - * if none are available. - */ -static int get_mmio_atsd_reg(struct npu *npu) -{ - int i; - - for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_bit(i, &npu->mmio_atsd_usage)) - if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) - return i; - } - - return -ENOSPC; -} - -static void put_mmio_atsd_reg(struct npu *npu, int reg) -{ - clear_bit_unlock(reg, &npu->mmio_atsd_usage); -} - -/* MMIO ATSD register offsets */ -#define XTS_ATSD_LAUNCH 0 -#define XTS_ATSD_AVA 1 -#define XTS_ATSD_STAT 2 - -static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize) -{ - unsigned long launch = 0; - - if (psize == MMU_PAGE_COUNT) { - /* IS set to invalidate entire matching PID */ - launch |= PPC_BIT(12); - } else { - /* AP set to invalidate region of psize */ - launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17); - } - - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); - - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); - - /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */ - - return launch; -} - -static void mmio_atsd_regs_write(struct mmio_atsd_reg - mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset, - unsigned long val) -{ - struct npu *npu; - int i, reg; - - for (i = 0; i <= max_npu2_index; i++) { - reg = mmio_atsd_reg[i].reg; - if (reg < 0) - continue; - - npu = mmio_atsd_reg[i].npu; - __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset); - } -} - -static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], - unsigned long pid) -{ - unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT); - - /* Invalidating the entire process doesn't use a va */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); -} - -static void mmio_invalidate_range(struct mmio_atsd_reg - mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid, - unsigned long start, unsigned long psize) -{ - unsigned long launch = get_atsd_launch_val(pid, psize); - - /* Write all VAs first */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start); - - /* Issue one barrier for all address writes */ - eieio(); - - /* Launch */ - mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch); -} - -#define mn_to_npu_context(x) container_of(x, struct npu_context, mn) - -static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - struct npu *npu; - int i, reg; - - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - } -} - -/* - * Acquires all the address translation shootdown (ATSD) registers required to - * launch an ATSD on all links this npu_context is active on. - */ -static void acquire_atsd_reg(struct npu_context *npu_context, - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i, j; - struct npu *npu; - struct pci_dev *npdev; - - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - /* - * There are no ordering requirements with respect to - * the setup of struct npu_context, but to ensure - * consistent behaviour we need to ensure npdev[][] is - * only read once. - */ - npdev = READ_ONCE(npu_context->npdev[i][j]); - if (!npdev) - continue; - - npu = pci_bus_to_host(npdev->bus)->npu; - if (!npu) - continue; - - mmio_atsd_reg[i].npu = npu; - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - while (mmio_atsd_reg[i].reg < 0) { - mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } - break; - } - } -} - -/* - * Release previously acquired ATSD registers. To avoid deadlocks the registers - * must be released in the same order they were acquired above in - * acquire_atsd_reg. - */ -static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) -{ - int i; - - for (i = 0; i <= max_npu2_index; i++) { - /* - * We can't rely on npu_context->npdev[][] being the same here - * as when acquire_atsd_reg() was called, hence we use the - * values stored in mmio_atsd_reg during the acquire phase - * rather than re-reading npdev[][]. - */ - if (mmio_atsd_reg[i].reg < 0) - continue; - - put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); - } -} - -/* - * Invalidate a virtual address range - */ -static void mmio_invalidate(struct npu_context *npu_context, - unsigned long start, unsigned long size) -{ - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; - unsigned long pid = npu_context->mm->context.id; - unsigned long atsd_start = 0; - unsigned long end = start + size - 1; - int atsd_psize = MMU_PAGE_COUNT; - - /* - * Convert the input range into one of the supported sizes. If the range - * doesn't fit, use the next larger supported size. Invalidation latency - * is high, so over-invalidation is preferred to issuing multiple - * invalidates. - * - * A 4K page size isn't supported by NPU/GPU ATS, so that case is - * ignored. - */ - if (size == SZ_64K) { - atsd_start = start; - atsd_psize = MMU_PAGE_64K; - } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) { - atsd_start = ALIGN_DOWN(start, SZ_2M); - atsd_psize = MMU_PAGE_2M; - } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) { - atsd_start = ALIGN_DOWN(start, SZ_1G); - atsd_psize = MMU_PAGE_1G; - } - - if (npu_context->nmmu_flush) - /* - * Unfortunately the nest mmu does not support flushing specific - * addresses so we have to flush the whole mm once before - * shooting down the GPU translation. - */ - flush_all_mm(npu_context->mm); - - /* - * Loop over all the NPUs this process is active on and launch - * an invalidate. - */ - acquire_atsd_reg(npu_context, mmio_atsd_reg); - - if (atsd_psize == MMU_PAGE_COUNT) - mmio_invalidate_pid(mmio_atsd_reg, pid); - else - mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start, - atsd_psize); - - mmio_invalidate_wait(mmio_atsd_reg); - - /* - * The GPU requires two flush ATSDs to ensure all entries have been - * flushed. We use PID 0 as it will never be used for a process on the - * GPU. - */ - mmio_invalidate_pid(mmio_atsd_reg, 0); - mmio_invalidate_wait(mmio_atsd_reg); - mmio_invalidate_pid(mmio_atsd_reg, 0); - mmio_invalidate_wait(mmio_atsd_reg); - - release_atsd_reg(mmio_atsd_reg); -} - -static void pnv_npu2_mn_release(struct mmu_notifier *mn, - struct mm_struct *mm) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - - /* Call into device driver to stop requests to the NMMU */ - if (npu_context->release_cb) - npu_context->release_cb(npu_context, npu_context->priv); - - /* - * There should be no more translation requests for this PID, but we - * need to ensure any entries for it are removed from the TLB. - */ - mmio_invalidate(npu_context, 0, ~0UL); -} - -static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, start, end - start); -} - -static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { - .release = pnv_npu2_mn_release, - .invalidate_range = pnv_npu2_mn_invalidate_range, -}; - -/* - * Call into OPAL to setup the nmmu context for the current task in - * the NPU. This must be called to setup the context tables before the - * GPU issues ATRs. pdev should be a pointed to PCIe GPU device. - * - * A release callback should be registered to allow a device driver to - * be notified that it should not launch any new translation requests - * as the final TLB invalidate is about to occur. - * - * Returns an error if there no contexts are currently available or a - * npu_context which should be passed to pnv_npu2_handle_fault(). - * - * mmap_sem must be held in write mode and must not be called from interrupt - * context. - */ -struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, - unsigned long flags, - void (*cb)(struct npu_context *, void *), - void *priv) -{ - int rc; - u32 nvlink_index; - struct device_node *nvlink_dn; - struct mm_struct *mm = current->mm; - struct npu *npu; - struct npu_context *npu_context; - struct pci_controller *hose; - - /* - * At present we don't support GPUs connected to multiple NPUs and I'm - * not sure the hardware does either. - */ - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - - if (!npdev) - /* No nvlink associated with this GPU device */ - return ERR_PTR(-ENODEV); - - /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */ - if (flags & ~(MSR_DR | MSR_PR | MSR_HV)) - return ERR_PTR(-EINVAL); - - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return ERR_PTR(-ENODEV); - - if (!mm || mm->context.id == 0) { - /* - * Kernel thread contexts are not supported and context id 0 is - * reserved on the GPU. - */ - return ERR_PTR(-EINVAL); - } - - hose = pci_bus_to_host(npdev->bus); - npu = hose->npu; - if (!npu) - return ERR_PTR(-ENODEV); - - /* - * We store the npu pci device so we can more easily get at the - * associated npus. - */ - spin_lock(&npu_context_lock); - npu_context = mm->context.npu_context; - if (npu_context) { - if (npu_context->release_cb != cb || - npu_context->priv != priv) { - spin_unlock(&npu_context_lock); - return ERR_PTR(-EINVAL); - } - - WARN_ON(!kref_get_unless_zero(&npu_context->kref)); - } - spin_unlock(&npu_context_lock); - - if (!npu_context) { - /* - * We can set up these fields without holding the - * npu_context_lock as the npu_context hasn't been returned to - * the caller meaning it can't be destroyed. Parallel allocation - * is protected against by mmap_sem. - */ - rc = -ENOMEM; - npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL); - if (npu_context) { - kref_init(&npu_context->kref); - npu_context->mm = mm; - npu_context->mn.ops = &nv_nmmu_notifier_ops; - rc = __mmu_notifier_register(&npu_context->mn, mm); - } - - if (rc) { - kfree(npu_context); - return ERR_PTR(rc); - } - - mm->context.npu_context = npu_context; - } - - npu_context->release_cb = cb; - npu_context->priv = priv; - - /* - * npdev is a pci_dev pointer setup by the PCI code. We assign it to - * npdev[][] to indicate to the mmu notifiers that an invalidation - * should also be sent over this nvlink. The notifiers don't use any - * other fields in npu_context, so we just need to ensure that when they - * deference npu_context->npdev[][] it is either a valid pointer or - * NULL. - */ - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); - - if (!npu->nmmu_flush) { - /* - * If we're not explicitly flushing ourselves we need to mark - * the thread for global flushes - */ - npu_context->nmmu_flush = false; - mm_context_add_copro(mm); - } else - npu_context->nmmu_flush = true; - - return npu_context; -} -EXPORT_SYMBOL(pnv_npu2_init_context); - -static void pnv_npu2_release_context(struct kref *kref) -{ - struct npu_context *npu_context = - container_of(kref, struct npu_context, kref); - - if (!npu_context->nmmu_flush) - mm_context_remove_copro(npu_context->mm); - - npu_context->mm->context.npu_context = NULL; -} - -/* - * Destroy a context on the given GPU. May free the npu_context if it is no - * longer active on any GPUs. Must not be called from interrupt context. - */ -void pnv_npu2_destroy_context(struct npu_context *npu_context, - struct pci_dev *gpdev) -{ - int removed; - struct npu *npu; - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); - struct device_node *nvlink_dn; - u32 nvlink_index; - struct pci_controller *hose; - - if (WARN_ON(!npdev)) - return; - - hose = pci_bus_to_host(npdev->bus); - npu = hose->npu; - if (!npu) - return; - nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0); - if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", - &nvlink_index))) - return; - WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); - spin_lock(&npu_context_lock); - removed = kref_put(&npu_context->kref, pnv_npu2_release_context); - spin_unlock(&npu_context_lock); - - /* - * We need to do this outside of pnv_npu2_release_context so that it is - * outside the spinlock as mmu_notifier_destroy uses SRCU. - */ - if (removed) { - mmu_notifier_unregister(&npu_context->mn, - npu_context->mm); - - kfree(npu_context); - } - -} -EXPORT_SYMBOL(pnv_npu2_destroy_context); - -/* - * Assumes mmap_sem is held for the contexts associated mm. - */ -int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, - unsigned long *flags, unsigned long *status, int count) -{ - u64 rc = 0, result = 0; - int i, is_write; - struct page *page[1]; - const char __user *u; - char c; - - /* mmap_sem should be held so the struct_mm must be present */ - struct mm_struct *mm = context->mm; - - WARN_ON(!rwsem_is_locked(&mm->mmap_sem)); - - for (i = 0; i < count; i++) { - is_write = flags[i] & NPU2_WRITE; - rc = get_user_pages_remote(NULL, mm, ea[i], 1, - is_write ? FOLL_WRITE : 0, - page, NULL, NULL); - - if (rc != 1) { - status[i] = rc; - result = -EFAULT; - continue; - } - - /* Make sure partition scoped tree gets a pte */ - u = page_address(page[0]); - if (__get_user(c, u)) - result = -EFAULT; - - status[i] = 0; - put_page(page[0]); - } - - return result; -} -EXPORT_SYMBOL(pnv_npu2_handle_fault); - int pnv_npu2_init(struct pci_controller *hose) { - unsigned int i; - u64 mmio_atsd; static int npu_index; struct npu *npu; int ret; @@ -1160,33 +632,18 @@ int pnv_npu2_init(struct pci_controller *hose) if (!npu) return -ENOMEM; - npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush"); - - for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) && - !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", - i, &mmio_atsd); i++) - npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32); - - pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i); - npu->mmio_atsd_count = i; - npu->mmio_atsd_usage = 0; npu_index++; if (WARN_ON(npu_index >= NV_MAX_NPUS)) { ret = -ENOSPC; goto fail_exit; } - max_npu2_index = npu_index; npu->index = npu_index; hose->npu = npu; return 0; fail_exit: - for (i = 0; i < npu->mmio_atsd_count; ++i) - iounmap(npu->mmio_atsd_regs[i]); - kfree(npu); - return ret; } diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 36c8fa3647a2..29ca523c1c79 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -273,7 +273,6 @@ OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); -OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c index 5cae375525d0..3e1f064a18db 100644 --- a/arch/powerpc/platforms/powernv/opal-hmi.c +++ b/arch/powerpc/platforms/powernv/opal-hmi.c @@ -137,6 +137,43 @@ static void print_nx_checkstop_reason(const char *level, xstop_reason[i].description); } +static void print_npu_checkstop_reason(const char *level, + struct OpalHMIEvent *hmi_evt) +{ + uint8_t reason, reason_count, i; + + /* + * We may not have a checkstop reason on some combination of + * hardware and/or skiboot version + */ + if (!hmi_evt->u.xstop_error.xstop_reason) { + printk("%s NPU checkstop on chip %x\n", level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id)); + return; + } + + /* + * NPU2 has 3 FIRs. Reason encoded on a byte as: + * 2 bits for the FIR number + * 6 bits for the bit number + * It may be possible to find several reasons. + * + * We don't display a specific message per FIR bit as there + * are too many and most are meaningless without the workbook + * and/or hw team help anyway. + */ + reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) / + sizeof(reason); + for (i = 0; i < reason_count; i++) { + reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF; + if (reason) + printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n", + level, + be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id), + reason >> 6, reason & 0x3F); + } +} + static void print_checkstop_reason(const char *level, struct OpalHMIEvent *hmi_evt) { @@ -148,6 +185,9 @@ static void print_checkstop_reason(const char *level, case CHECKSTOP_TYPE_NX: print_nx_checkstop_reason(level, hmi_evt); break; + case CHECKSTOP_TYPE_NPU: + print_npu_checkstop_reason(level, hmi_evt); + break; default: printk("%s Unknown Malfunction Alert of type %d\n", level, type); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 98c5d94b17fb..aba443be7daa 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -202,16 +202,18 @@ static int __init opal_register_exception_handlers(void) glue = 0x7000; /* - * Check if we are running on newer firmware that exports - * OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to patch - * the HMI interrupt and we catch it directly in Linux. + * Only ancient OPAL firmware requires this. + * Specifically, firmware from FW810.00 (released June 2014) + * through FW810.20 (Released October 2014). * - * For older firmware (i.e currently released POWER8 System Firmware - * as of today <= SV810_087), we fallback to old behavior and let OPAL - * patch the HMI vector and handle it inside OPAL firmware. + * Check if we are running on newer (post Oct 2014) firmware that + * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to + * patch the HMI interrupt and we catch it directly in Linux. * - * For newer firmware (in development/yet to be released) we will - * start catching/handling HMI directly in Linux. + * For older firmware (i.e < FW810.20), we fallback to old behavior and + * let OPAL patch the HMI vector and handle it inside OPAL firmware. + * + * For newer firmware we catch/handle the HMI directly in Linux. */ if (!opal_check_token(OPAL_HANDLE_HMI)) { pr_info("Old firmware detected, OPAL handles HMIs.\n"); @@ -221,6 +223,11 @@ static int __init opal_register_exception_handlers(void) glue += 128; } + /* + * Only applicable to ancient firmware, all modern + * (post March 2015/skiboot 5.0) firmware will just return + * OPAL_UNSUPPORTED. + */ opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue); #endif diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 10cc42b9e541..d8080558d020 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -50,6 +50,8 @@ static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", "NPU_OCAPI" }; +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); + void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...) { @@ -2356,7 +2358,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, return 0; } -void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; @@ -2456,6 +2458,14 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) if (!pnv_iommu_bypass_disabled) pnv_pci_ioda2_set_bypass(pe, true); + /* + * Set table base for the case of IOMMU DMA use. Usually this is done + * from dma_dev_setup() which is not called when a device is returned + * from VFIO so do it here. + */ + if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, tbl); + return 0; } @@ -2543,6 +2553,8 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group) pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (pe->pbus) pnv_ioda_setup_bus_dma(pe, pe->pbus); + else if (pe->pdev) + set_iommu_table_base(&pe->pdev->dev, NULL); iommu_tce_table_put(tbl); } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index ff1a33fee8e6..6104418c9ad5 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -34,7 +34,6 @@ #include "powernv.h" #include "pci.h" -static DEFINE_MUTEX(p2p_mutex); static DEFINE_MUTEX(tunnel_mutex); int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) @@ -857,79 +856,6 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus) } } -int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) -{ - struct pci_controller *hose; - struct pnv_phb *phb_init, *phb_target; - struct pnv_ioda_pe *pe_init; - int rc; - - if (!opal_check_token(OPAL_PCI_SET_P2P)) - return -ENXIO; - - hose = pci_bus_to_host(initiator->bus); - phb_init = hose->private_data; - - hose = pci_bus_to_host(target->bus); - phb_target = hose->private_data; - - pe_init = pnv_ioda_get_pe(initiator); - if (!pe_init) - return -ENODEV; - - /* - * Configuring the initiator's PHB requires to adjust its - * TVE#1 setting. Since the same device can be an initiator - * several times for different target devices, we need to keep - * a reference count to know when we can restore the default - * bypass setting on its TVE#1 when disabling. Opal is not - * tracking PE states, so we add a reference count on the PE - * in linux. - * - * For the target, the configuration is per PHB, so we keep a - * target reference count on the PHB. - */ - mutex_lock(&p2p_mutex); - - if (desc & OPAL_PCI_P2P_ENABLE) { - /* always go to opal to validate the configuration */ - rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, - desc, pe_init->pe_number); - - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - - pe_init->p2p_initiator_count++; - phb_target->p2p_target_count++; - } else { - if (!pe_init->p2p_initiator_count || - !phb_target->p2p_target_count) { - rc = -EINVAL; - goto out; - } - - if (--pe_init->p2p_initiator_count == 0) - pnv_pci_ioda2_set_bypass(pe_init, true); - - if (--phb_target->p2p_target_count == 0) { - rc = opal_pci_set_p2p(phb_init->opal_id, - phb_target->opal_id, desc, - pe_init->pe_number); - if (rc != OPAL_SUCCESS) { - rc = -EIO; - goto out; - } - } - } - rc = 0; -out: - mutex_unlock(&p2p_mutex); - return rc; -} -EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); - struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); @@ -938,54 +864,6 @@ struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) } EXPORT_SYMBOL(pnv_pci_get_phb_node); -int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind) -{ - struct device_node *np; - const __be32 *prop; - struct pnv_ioda_pe *pe; - uint16_t window_id; - int rc; - - if (!radix_enabled()) - return -ENXIO; - - if (!(np = pnv_pci_get_phb_node(dev))) - return -ENXIO; - - prop = of_get_property(np, "ibm,phb-indications", NULL); - of_node_put(np); - - if (!prop || !prop[1]) - return -ENXIO; - - *asnind = (u64)be32_to_cpu(prop[1]); - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - /* Increase real window size to accept as_notify messages. */ - window_id = (pe->pe_number << 1 ) + 1; - rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number, - window_id, pe->tce_bypass_base, - (uint64_t)1 << 48); - return opal_error_code(rc); -} -EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel); - -int pnv_pci_disable_tunnel(struct pci_dev *dev) -{ - struct pnv_ioda_pe *pe; - - pe = pnv_ioda_get_pe(dev); - if (!pe) - return -ENODEV; - - /* Restore default real window size. */ - pnv_pci_ioda2_set_bypass(pe, true); - return 0; -} -EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel); - int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable) { __be64 val; @@ -1040,29 +918,6 @@ out: } EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar); -#ifdef CONFIG_PPC64 /* for thread.tidr */ -int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid, - u32 *tid) -{ - struct mm_struct *mm = NULL; - - if (task == NULL) - return -EINVAL; - - mm = get_task_mm(task); - if (mm == NULL) - return -EINVAL; - - *pid = mm->context.id; - mmput(mm); - - *tid = task->thread.tidr; - *lpid = mfspr(SPRN_LPID); - return 0; -} -EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info); -#endif - void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index be26ab3d99e0..469c24463247 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -79,9 +79,6 @@ struct pnv_ioda_pe { struct pnv_ioda_pe *master; struct list_head slaves; - /* PCI peer-to-peer*/ - int p2p_initiator_count; - /* Link in list of PE#s */ struct list_head list; }; @@ -172,8 +169,6 @@ struct pnv_phb { /* PHB and hub diagnostics */ unsigned int diag_data_size; u8 *diag_data; - - int p2p_target_count; }; extern struct pci_ops pnv_pci_ops; @@ -200,7 +195,6 @@ extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type); extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); -extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c index ea5ca0201da8..0c0d27d17976 100644 --- a/arch/powerpc/platforms/powernv/vas-window.c +++ b/arch/powerpc/platforms/powernv/vas-window.c @@ -40,16 +40,6 @@ static void compute_paste_address(struct vas_window *window, u64 *addr, int *len pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr); } -u64 vas_win_paste_addr(struct vas_window *win) -{ - u64 addr; - - compute_paste_address(win, &addr, NULL); - - return addr; -} -EXPORT_SYMBOL(vas_win_paste_addr); - static inline void get_hvwc_mmio_bar(struct vas_window *window, u64 *start, int *len) { @@ -1264,12 +1254,3 @@ int vas_win_close(struct vas_window *window) return 0; } EXPORT_SYMBOL_GPL(vas_win_close); - -/* - * Return a system-wide unique window id for the window @win. - */ -u32 vas_win_id(struct vas_window *win) -{ - return encode_pswid(win->vinst->vas_id, win->winid); -} -EXPORT_SYMBOL_GPL(vas_win_id); diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h index 9cc5251816db..5574aec9ee88 100644 --- a/arch/powerpc/platforms/powernv/vas.h +++ b/arch/powerpc/platforms/powernv/vas.h @@ -444,26 +444,6 @@ static inline u64 read_hvwc_reg(struct vas_window *win, return in_be64(win->hvwc_map+reg); } -/* - * Encode/decode the Partition Send Window ID (PSWID) for a window in - * a way that we can uniquely identify any window in the system. i.e. - * we should be able to locate the 'struct vas_window' given the PSWID. - * - * Bits Usage - * 0:7 VAS id (8 bits) - * 8:15 Unused, 0 (3 bits) - * 16:31 Window id (16 bits) - */ -static inline u32 encode_pswid(int vasid, int winid) -{ - u32 pswid = 0; - - pswid |= vasid << (31 - 7); - pswid |= winid; - - return pswid; -} - static inline void decode_pswid(u32 pswid, int *vasid, int *winid) { if (vasid) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 9c6b3d860518..f7b484f55553 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -23,6 +23,7 @@ config PPC_PSERIES select ARCH_RANDOM select PPC_DOORBELL select FORCE_SMP + select SWIOTLB default y config PPC_SPLPAR @@ -80,19 +81,19 @@ config LPARCFG bool "LPAR Configuration Data" depends on PPC_PSERIES help - Provide system capacity information via human readable - <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. + Provide system capacity information via human readable + <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. config PPC_PSERIES_DEBUG depends on PPC_PSERIES && PPC_EARLY_DEBUG bool "Enable extra debug logging in platforms/pseries" - help + default y + help Say Y here if you want the pseries core to produce a bunch of debug messages to the system log. Select this if you are having a problem with the pseries core and want to see more of what is going on. This does not enable debugging in lpar.c, which must be manually done due to its verbosity. - default y config PPC_SMLPAR bool "Support for shared-memory logical partitions" @@ -117,16 +118,16 @@ config CMM balance memory across many LPARs. config HV_PERF_CTRS - bool "Hypervisor supplied PMU events (24x7 & GPCI)" - default y - depends on PERF_EVENTS && PPC_PSERIES - help + bool "Hypervisor supplied PMU events (24x7 & GPCI)" + default y + depends on PERF_EVENTS && PPC_PSERIES + help Enable access to hypervisor supplied counters in perf. Currently, this enables code that uses the hcall GetPerfCounterInfo and 24x7 interfaces to retrieve counters. GPCI exists on Power 6 and later systems. 24x7 is available on Power 8 and later systems. - If unsure, select Y. + If unsure, select Y. config IBMVIO depends on PPC_PSERIES diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index a43ec843c8e2..ab3d59aeacca 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_PAPR_SCM) += papr_scm.o +obj-$(CONFIG_PPC_SPLPAR) += vphn.o ifdef CONFIG_PPC_PSERIES obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 437a74173db2..16e86ba8aa20 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -58,6 +58,10 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); prop->name = kstrdup(name, GFP_KERNEL); + if (!prop->name) { + dlpar_free_cc_property(prop); + return NULL; + } prop->length = be32_to_cpu(ccwa->prop_length); value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset); @@ -383,11 +387,11 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog) struct pseries_hp_work *work; struct pseries_hp_errorlog *hp_errlog_copy; - hp_errlog_copy = kmalloc(sizeof(struct pseries_hp_errorlog), - GFP_KERNEL); - memcpy(hp_errlog_copy, hp_errlog, sizeof(struct pseries_hp_errorlog)); + hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC); + if (!hp_errlog_copy) + return; - work = kmalloc(sizeof(struct pseries_hp_work), GFP_KERNEL); + work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC); if (work) { INIT_WORK((struct work_struct *)work, pseries_hp_work_fn); work->errlog = hp_errlog_copy; diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index ab5de985a787..2b87480f2837 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -27,13 +27,7 @@ struct dtl { }; static DEFINE_PER_CPU(struct dtl, cpu_dtl); -/* - * Dispatch trace log event mask: - * 0x7: 0x1: voluntary virtual processor waits - * 0x2: time-slice preempts - * 0x4: virtual partition memory page faults - */ -static u8 dtl_event_mask = 0x7; +static u8 dtl_event_mask = DTL_LOG_ALL; /* @@ -48,7 +42,6 @@ struct dtl_ring { struct dtl_entry *write_ptr; struct dtl_entry *buf; struct dtl_entry *buf_end; - u8 saved_dtl_mask; }; static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); @@ -98,7 +91,6 @@ static int dtl_start(struct dtl *dtl) dtlr->write_ptr = dtl->buf; /* enable event logging */ - dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask; lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; dtl_consumer = consume_dtle; @@ -116,7 +108,7 @@ static void dtl_stop(struct dtl *dtl) dtlr->buf = NULL; /* restore dtl_enable_mask */ - lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask; + lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT; if (atomic_dec_and_test(&dtl_count)) dtl_consumer = NULL; @@ -188,11 +180,16 @@ static int dtl_enable(struct dtl *dtl) if (dtl->buf) return -EBUSY; + /* ensure there are no other conflicting dtl users */ + if (!read_trylock(&dtl_access_lock)) + return -EBUSY; + n_entries = dtl_buf_entries; buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); + read_unlock(&dtl_access_lock); return -ENOMEM; } @@ -209,8 +206,11 @@ static int dtl_enable(struct dtl *dtl) } spin_unlock(&dtl->lock); - if (rc) + if (rc) { + read_unlock(&dtl_access_lock); kmem_cache_free(dtl_cache, buf); + } + return rc; } @@ -222,6 +222,7 @@ static void dtl_disable(struct dtl *dtl) dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); + read_unlock(&dtl_access_lock); } /* file interface */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2ec43b4639a0..46d0d35b9ca4 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -976,6 +976,9 @@ static int pseries_update_drconf_memory(struct of_reconfig_data *pr) if (!memblock_size) return -EINVAL; + if (!pr->old_prop) + return 0; + p = (__be32 *) pr->old_prop->value; if (!p) return -EINVAL; diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 1498c6b989e6..1ac52963e08b 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -49,7 +49,7 @@ EXPORT_SYMBOL(hvc_get_chars); * @vtermno: The vtermno or unit_address of the adapter from which the data * originated. * @buf: The character buffer that contains the character data to send to - * firmware. + * firmware. Must be at least 16 bytes, even if count is less than 16. * @count: Send this number of characters. */ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 84e8ec4011ba..b91eb0929ed1 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -147,13 +147,13 @@ static const struct dma_map_ops ibmebus_dma_ops = { .unmap_page = ibmebus_unmap_page, }; -static int ibmebus_match_path(struct device *dev, void *data) +static int ibmebus_match_path(struct device *dev, const void *data) { struct device_node *dn = to_platform_device(dev)->dev.of_node; return (of_find_node_by_path(data) == dn); } -static int ibmebus_match_node(struct device *dev, void *data) +static int ibmebus_match_node(struct device *dev, const void *data) { return to_platform_device(dev)->dev.of_node == data; } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 73620dfb63a1..09bb878c21e0 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -17,6 +17,10 @@ #include <linux/jump_label.h> #include <linux/delay.h> #include <linux/stop_machine.h> +#include <linux/spinlock.h> +#include <linux/cpuhotplug.h> +#include <linux/workqueue.h> +#include <linux/proc_fs.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> @@ -52,13 +56,591 @@ EXPORT_SYMBOL(plpar_hcall); EXPORT_SYMBOL(plpar_hcall9); EXPORT_SYMBOL(plpar_hcall_norets); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +static u8 dtl_mask = DTL_LOG_PREEMPT; +#else +static u8 dtl_mask; +#endif + +void alloc_dtl_buffers(unsigned long *time_limit) +{ + int cpu; + struct paca_struct *pp; + struct dtl_entry *dtl; + + for_each_possible_cpu(cpu) { + pp = paca_ptrs[cpu]; + if (pp->dispatch_log) + continue; + dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); + if (!dtl) { + pr_warn("Failed to allocate dispatch trace log for cpu %d\n", + cpu); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + pr_warn("Stolen time statistics will be unreliable\n"); +#endif + break; + } + + pp->dtl_ridx = 0; + pp->dispatch_log = dtl; + pp->dispatch_log_end = dtl + N_DISPATCH_LOG; + pp->dtl_curr = dtl; + + if (time_limit && time_after(jiffies, *time_limit)) { + cond_resched(); + *time_limit = jiffies + HZ; + } + } +} + +void register_dtl_buffer(int cpu) +{ + long ret; + struct paca_struct *pp; + struct dtl_entry *dtl; + int hwcpu = get_hard_smp_processor_id(cpu); + + pp = paca_ptrs[cpu]; + dtl = pp->dispatch_log; + if (dtl && dtl_mask) { + pp->dtl_ridx = 0; + pp->dtl_curr = dtl; + lppaca_of(cpu).dtl_idx = 0; + + /* hypervisor reads buffer length from this field */ + dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); + ret = register_dtl(hwcpu, __pa(dtl)); + if (ret) + pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n", + cpu, hwcpu, ret); + + lppaca_of(cpu).dtl_enable_mask = dtl_mask; + } +} + +#ifdef CONFIG_PPC_SPLPAR +struct dtl_worker { + struct delayed_work work; + int cpu; +}; + +struct vcpu_dispatch_data { + int last_disp_cpu; + + int total_disp; + + int same_cpu_disp; + int same_chip_disp; + int diff_chip_disp; + int far_chip_disp; + + int numa_home_disp; + int numa_remote_disp; + int numa_far_disp; +}; + +/* + * This represents the number of cpus in the hypervisor. Since there is no + * architected way to discover the number of processors in the host, we + * provision for dealing with NR_CPUS. This is currently 2048 by default, and + * is sufficient for our purposes. This will need to be tweaked if + * CONFIG_NR_CPUS is changed. + */ +#define NR_CPUS_H NR_CPUS + +DEFINE_RWLOCK(dtl_access_lock); +static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); +static DEFINE_PER_CPU(u64, dtl_entry_ridx); +static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); +static enum cpuhp_state dtl_worker_state; +static DEFINE_MUTEX(dtl_enable_mutex); +static int vcpudispatch_stats_on __read_mostly; +static int vcpudispatch_stats_freq = 50; +static __be32 *vcpu_associativity, *pcpu_associativity; + + +static void free_dtl_buffers(unsigned long *time_limit) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + int cpu; + struct paca_struct *pp; + + for_each_possible_cpu(cpu) { + pp = paca_ptrs[cpu]; + if (!pp->dispatch_log) + continue; + kmem_cache_free(dtl_cache, pp->dispatch_log); + pp->dtl_ridx = 0; + pp->dispatch_log = 0; + pp->dispatch_log_end = 0; + pp->dtl_curr = 0; + + if (time_limit && time_after(jiffies, *time_limit)) { + cond_resched(); + *time_limit = jiffies + HZ; + } + } +#endif +} + +static int init_cpu_associativity(void) +{ + vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core, + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); + pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core, + VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL); + + if (!vcpu_associativity || !pcpu_associativity) { + pr_err("error allocating memory for associativity information\n"); + return -ENOMEM; + } + + return 0; +} + +static void destroy_cpu_associativity(void) +{ + kfree(vcpu_associativity); + kfree(pcpu_associativity); + vcpu_associativity = pcpu_associativity = 0; +} + +static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag) +{ + __be32 *assoc; + int rc = 0; + + assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE]; + if (!assoc[0]) { + rc = hcall_vphn(cpu, flag, &assoc[0]); + if (rc) + return NULL; + } + + return assoc; +} + +static __be32 *get_pcpu_associativity(int cpu) +{ + return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU); +} + +static __be32 *get_vcpu_associativity(int cpu) +{ + return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU); +} + +static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu) +{ + __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc; + + if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H) + return -EINVAL; + + last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu); + cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu); + + if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc) + return -EIO; + + return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc); +} + +static int cpu_home_node_dispatch_distance(int disp_cpu) +{ + __be32 *disp_cpu_assoc, *vcpu_assoc; + int vcpu_id = smp_processor_id(); + + if (disp_cpu >= NR_CPUS_H) { + pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n", + disp_cpu, NR_CPUS_H); + return -EINVAL; + } + + disp_cpu_assoc = get_pcpu_associativity(disp_cpu); + vcpu_assoc = get_vcpu_associativity(vcpu_id); + + if (!disp_cpu_assoc || !vcpu_assoc) + return -EIO; + + return cpu_distance(disp_cpu_assoc, vcpu_assoc); +} + +static void update_vcpu_disp_stat(int disp_cpu) +{ + struct vcpu_dispatch_data *disp; + int distance; + + disp = this_cpu_ptr(&vcpu_disp_data); + if (disp->last_disp_cpu == -1) { + disp->last_disp_cpu = disp_cpu; + return; + } + + disp->total_disp++; + + if (disp->last_disp_cpu == disp_cpu || + (cpu_first_thread_sibling(disp->last_disp_cpu) == + cpu_first_thread_sibling(disp_cpu))) + disp->same_cpu_disp++; + else { + distance = cpu_relative_dispatch_distance(disp->last_disp_cpu, + disp_cpu); + if (distance < 0) + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", + smp_processor_id()); + else { + switch (distance) { + case 0: + disp->same_chip_disp++; + break; + case 1: + disp->diff_chip_disp++; + break; + case 2: + disp->far_chip_disp++; + break; + default: + pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n", + smp_processor_id(), + disp->last_disp_cpu, + disp_cpu, + distance); + } + } + } + + distance = cpu_home_node_dispatch_distance(disp_cpu); + if (distance < 0) + pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n", + smp_processor_id()); + else { + switch (distance) { + case 0: + disp->numa_home_disp++; + break; + case 1: + disp->numa_remote_disp++; + break; + case 2: + disp->numa_far_disp++; + break; + default: + pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n", + smp_processor_id(), + disp_cpu, + distance); + } + } + + disp->last_disp_cpu = disp_cpu; +} + +static void process_dtl_buffer(struct work_struct *work) +{ + struct dtl_entry dtle; + u64 i = __this_cpu_read(dtl_entry_ridx); + struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + struct dtl_entry *dtl_end = local_paca->dispatch_log_end; + struct lppaca *vpa = local_paca->lppaca_ptr; + struct dtl_worker *d = container_of(work, struct dtl_worker, work.work); + + if (!local_paca->dispatch_log) + return; + + /* if we have been migrated away, we cancel ourself */ + if (d->cpu != smp_processor_id()) { + pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n", + smp_processor_id()); + return; + } + + if (i == be64_to_cpu(vpa->dtl_idx)) + goto out; + + while (i < be64_to_cpu(vpa->dtl_idx)) { + dtle = *dtl; + barrier(); + if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { + /* buffer has overflowed */ + pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n", + d->cpu, + be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i); + i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; + dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); + continue; + } + update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id)); + ++i; + ++dtl; + if (dtl == dtl_end) + dtl = local_paca->dispatch_log; + } + + __this_cpu_write(dtl_entry_ridx, i); + +out: + schedule_delayed_work_on(d->cpu, to_delayed_work(work), + HZ / vcpudispatch_stats_freq); +} + +static int dtl_worker_online(unsigned int cpu) +{ + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); + + memset(d, 0, sizeof(*d)); + INIT_DELAYED_WORK(&d->work, process_dtl_buffer); + d->cpu = cpu; + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + per_cpu(dtl_entry_ridx, cpu) = 0; + register_dtl_buffer(cpu); +#else + per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx); +#endif + + schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq); + return 0; +} + +static int dtl_worker_offline(unsigned int cpu) +{ + struct dtl_worker *d = &per_cpu(dtl_workers, cpu); + + cancel_delayed_work_sync(&d->work); + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + unregister_dtl(get_hard_smp_processor_id(cpu)); +#endif + + return 0; +} + +static void set_global_dtl_mask(u8 mask) +{ + int cpu; + + dtl_mask = mask; + for_each_present_cpu(cpu) + lppaca_of(cpu).dtl_enable_mask = dtl_mask; +} + +static void reset_global_dtl_mask(void) +{ + int cpu; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + dtl_mask = DTL_LOG_PREEMPT; +#else + dtl_mask = 0; +#endif + for_each_present_cpu(cpu) + lppaca_of(cpu).dtl_enable_mask = dtl_mask; +} + +static int dtl_worker_enable(unsigned long *time_limit) +{ + int rc = 0, state; + + if (!write_trylock(&dtl_access_lock)) { + rc = -EBUSY; + goto out; + } + + set_global_dtl_mask(DTL_LOG_ALL); + + /* Setup dtl buffers and register those */ + alloc_dtl_buffers(time_limit); + + state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online", + dtl_worker_online, dtl_worker_offline); + if (state < 0) { + pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); + free_dtl_buffers(time_limit); + reset_global_dtl_mask(); + write_unlock(&dtl_access_lock); + rc = -EINVAL; + goto out; + } + dtl_worker_state = state; + +out: + return rc; +} + +static void dtl_worker_disable(unsigned long *time_limit) +{ + cpuhp_remove_state(dtl_worker_state); + free_dtl_buffers(time_limit); + reset_global_dtl_mask(); + write_unlock(&dtl_access_lock); +} + +static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, + size_t count, loff_t *ppos) +{ + unsigned long time_limit = jiffies + HZ; + struct vcpu_dispatch_data *disp; + int rc, cmd, cpu; + char buf[16]; + + if (count > 15) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + rc = kstrtoint(buf, 0, &cmd); + if (rc || cmd < 0 || cmd > 1) { + pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n"); + return rc ? rc : -EINVAL; + } + + mutex_lock(&dtl_enable_mutex); + + if ((cmd == 0 && !vcpudispatch_stats_on) || + (cmd == 1 && vcpudispatch_stats_on)) + goto out; + + if (cmd) { + rc = init_cpu_associativity(); + if (rc) + goto out; + + for_each_possible_cpu(cpu) { + disp = per_cpu_ptr(&vcpu_disp_data, cpu); + memset(disp, 0, sizeof(*disp)); + disp->last_disp_cpu = -1; + } + + rc = dtl_worker_enable(&time_limit); + if (rc) { + destroy_cpu_associativity(); + goto out; + } + } else { + dtl_worker_disable(&time_limit); + destroy_cpu_associativity(); + } + + vcpudispatch_stats_on = cmd; + +out: + mutex_unlock(&dtl_enable_mutex); + if (rc) + return rc; + return count; +} + +static int vcpudispatch_stats_display(struct seq_file *p, void *v) +{ + int cpu; + struct vcpu_dispatch_data *disp; + + if (!vcpudispatch_stats_on) { + seq_puts(p, "off\n"); + return 0; + } + + for_each_online_cpu(cpu) { + disp = per_cpu_ptr(&vcpu_disp_data, cpu); + seq_printf(p, "cpu%d", cpu); + seq_put_decimal_ull(p, " ", disp->total_disp); + seq_put_decimal_ull(p, " ", disp->same_cpu_disp); + seq_put_decimal_ull(p, " ", disp->same_chip_disp); + seq_put_decimal_ull(p, " ", disp->diff_chip_disp); + seq_put_decimal_ull(p, " ", disp->far_chip_disp); + seq_put_decimal_ull(p, " ", disp->numa_home_disp); + seq_put_decimal_ull(p, " ", disp->numa_remote_disp); + seq_put_decimal_ull(p, " ", disp->numa_far_disp); + seq_puts(p, "\n"); + } + + return 0; +} + +static int vcpudispatch_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, vcpudispatch_stats_display, NULL); +} + +static const struct file_operations vcpudispatch_stats_proc_ops = { + .open = vcpudispatch_stats_open, + .read = seq_read, + .write = vcpudispatch_stats_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static ssize_t vcpudispatch_stats_freq_write(struct file *file, + const char __user *p, size_t count, loff_t *ppos) +{ + int rc, freq; + char buf[16]; + + if (count > 15) + return -EINVAL; + + if (copy_from_user(buf, p, count)) + return -EFAULT; + + buf[count] = 0; + rc = kstrtoint(buf, 0, &freq); + if (rc || freq < 1 || freq > HZ) { + pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n", + HZ); + return rc ? rc : -EINVAL; + } + + vcpudispatch_stats_freq = freq; + + return count; +} + +static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v) +{ + seq_printf(p, "%d\n", vcpudispatch_stats_freq); + return 0; +} + +static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file) +{ + return single_open(file, vcpudispatch_stats_freq_display, NULL); +} + +static const struct file_operations vcpudispatch_stats_freq_proc_ops = { + .open = vcpudispatch_stats_freq_open, + .read = seq_read, + .write = vcpudispatch_stats_freq_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init vcpudispatch_stats_procfs_init(void) +{ + if (!lppaca_shared_proc(get_lppaca())) + return 0; + + if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, + &vcpudispatch_stats_proc_ops)) + pr_err("vcpudispatch_stats: error creating procfs file\n"); + else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL, + &vcpudispatch_stats_freq_proc_ops)) + pr_err("vcpudispatch_stats_freq: error creating procfs file\n"); + + return 0; +} + +machine_device_initcall(pseries, vcpudispatch_stats_procfs_init); +#endif /* CONFIG_PPC_SPLPAR */ + void vpa_init(int cpu) { int hwcpu = get_hard_smp_processor_id(cpu); unsigned long addr; long ret; - struct paca_struct *pp; - struct dtl_entry *dtl; /* * The spec says it "may be problematic" if CPU x registers the VPA of @@ -99,22 +681,7 @@ void vpa_init(int cpu) /* * Register dispatch trace log, if one has been allocated. */ - pp = paca_ptrs[cpu]; - dtl = pp->dispatch_log; - if (dtl) { - pp->dtl_ridx = 0; - pp->dtl_curr = dtl; - lppaca_of(cpu).dtl_idx = 0; - - /* hypervisor reads buffer length from this field */ - dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); - ret = register_dtl(hwcpu, __pa(dtl)); - if (ret) - pr_err("WARNING: DTL registration of cpu %d (hw %d) " - "failed with %ld\n", smp_processor_id(), - hwcpu, ret); - lppaca_of(cpu).dtl_enable_mask = 2; - } + register_dtl_buffer(cpu); } #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 0c48c8964783..fe812bebdf5e 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -6,6 +6,7 @@ * Copyright (C) 2010 IBM Corporation */ +#include <linux/cpu.h> #include <linux/kernel.h> #include <linux/kobject.h> #include <linux/smp.h> @@ -19,6 +20,7 @@ #include <asm/machdep.h> #include <asm/rtas.h> #include "pseries.h" +#include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -335,11 +337,28 @@ void post_mobility_fixup(void) if (rc) printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc); + /* + * We don't want CPUs to go online/offline while the device + * tree is being updated. + */ + cpus_read_lock(); + + /* + * It's common for the destination firmware to replace cache + * nodes. Release all of the cacheinfo hierarchy's references + * before updating the device tree. + */ + cacheinfo_teardown(); + rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + cacheinfo_rebuild(); + + cpus_read_unlock(); + /* Possibly switch to a new RFI flush type */ pseries_setup_rfi_flush(); diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 96c53b23e58f..c8ec670ee924 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -28,6 +28,7 @@ struct papr_scm_priv { uint64_t blocks; uint64_t block_size; int metadata_size; + bool is_volatile; uint64_t bound_addr; @@ -96,42 +97,102 @@ static int drc_pmem_unbind(struct papr_scm_priv *p) } static int papr_scm_meta_get(struct papr_scm_priv *p, - struct nd_cmd_get_config_data_hdr *hdr) + struct nd_cmd_get_config_data_hdr *hdr) { unsigned long data[PLPAR_HCALL_BUFSIZE]; + unsigned long offset, data_offset; + int len, read; int64_t ret; - if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) + if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) return -EINVAL; - ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, - hdr->in_offset, 1); - - if (ret == H_PARAMETER) /* bad DRC index */ - return -ENODEV; - if (ret) - return -EINVAL; /* other invalid parameter */ - - hdr->out_buf[0] = data[0] & 0xff; - + for (len = hdr->in_length; len; len -= read) { + + data_offset = hdr->in_length - len; + offset = hdr->in_offset + data_offset; + + if (len >= 8) + read = 8; + else if (len >= 4) + read = 4; + else if (len >= 2) + read = 2; + else + read = 1; + + ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index, + offset, read); + + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + + switch (read) { + case 8: + *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]); + break; + case 4: + *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff); + break; + + case 2: + *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff); + break; + + case 1: + *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff); + break; + } + } return 0; } static int papr_scm_meta_set(struct papr_scm_priv *p, - struct nd_cmd_set_config_hdr *hdr) + struct nd_cmd_set_config_hdr *hdr) { + unsigned long offset, data_offset; + int len, wrote; + unsigned long data; + __be64 data_be; int64_t ret; - if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1) + if ((hdr->in_offset + hdr->in_length) >= p->metadata_size) return -EINVAL; - ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, - p->drc_index, hdr->in_offset, hdr->in_buf[0], 1); - - if (ret == H_PARAMETER) /* bad DRC index */ - return -ENODEV; - if (ret) - return -EINVAL; /* other invalid parameter */ + for (len = hdr->in_length; len; len -= wrote) { + + data_offset = hdr->in_length - len; + offset = hdr->in_offset + data_offset; + + if (len >= 8) { + data = *(uint64_t *)(hdr->in_buf + data_offset); + data_be = cpu_to_be64(data); + wrote = 8; + } else if (len >= 4) { + data = *(uint32_t *)(hdr->in_buf + data_offset); + data &= 0xffffffff; + data_be = cpu_to_be32(data); + wrote = 4; + } else if (len >= 2) { + data = *(uint16_t *)(hdr->in_buf + data_offset); + data &= 0xffff; + data_be = cpu_to_be16(data); + wrote = 2; + } else { + data_be = *(uint8_t *)(hdr->in_buf + data_offset); + data_be &= 0xff; + wrote = 1; + } + + ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index, + offset, data_be, wrote); + if (ret == H_PARAMETER) /* bad DRC index */ + return -ENODEV; + if (ret) + return -EINVAL; /* other invalid parameter */ + } return 0; } @@ -153,7 +214,7 @@ int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, get_size_hdr = buf; get_size_hdr->status = 0; - get_size_hdr->max_xfer = 1; + get_size_hdr->max_xfer = 8; get_size_hdr->config_size = p->metadata_size; *cmd_rc = 0; break; @@ -248,7 +309,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.nd_set = &p->nd_set; set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); - p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); + if (p->is_volatile) + p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc); + else + p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc); if (!p->region) { dev_err(dev, "Error registering region %pR from %pOF\n", ndr_desc.res, p->dn); @@ -293,6 +357,7 @@ static int papr_scm_probe(struct platform_device *pdev) return -ENODEV; } + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; @@ -304,11 +369,19 @@ static int papr_scm_probe(struct platform_device *pdev) p->drc_index = drc_index; p->block_size = block_size; p->blocks = blocks; + p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required"); /* We just need to ensure that set cookies are unique across */ uuid_parse(uuid_str, (uuid_t *) uuid); - p->nd_set.cookie1 = uuid[0]; - p->nd_set.cookie2 = uuid[1]; + /* + * cookie1 and cookie2 are not really little endian + * we store a little endian representation of the + * uuid str so that we can compare this with the label + * area cookie irrespective of the endian config with which + * the kernel is built. + */ + p->nd_set.cookie1 = cpu_to_le64(uuid[0]); + p->nd_set.cookie2 = cpu_to_le64(uuid[1]); /* might be zero */ p->metadata_size = metadata_size; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8fa012a65a71..f5940cc71c37 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -38,6 +38,7 @@ #include <linux/of.h> #include <linux/of_pci.h> #include <linux/memblock.h> +#include <linux/swiotlb.h> #include <asm/mmu.h> #include <asm/processor.h> @@ -67,6 +68,7 @@ #include <asm/isa-bridge.h> #include <asm/security_features.h> #include <asm/asm-const.h> +#include <asm/swiotlb.h> #include "pseries.h" #include "../../../../drivers/pci/pci.h" @@ -273,46 +275,16 @@ struct kmem_cache *dtl_cache; */ static int alloc_dispatch_logs(void) { - int cpu, ret; - struct paca_struct *pp; - struct dtl_entry *dtl; - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return 0; if (!dtl_cache) return 0; - for_each_possible_cpu(cpu) { - pp = paca_ptrs[cpu]; - dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL); - if (!dtl) { - pr_warn("Failed to allocate dispatch trace log for cpu %d\n", - cpu); - pr_warn("Stolen time statistics will be unreliable\n"); - break; - } - - pp->dtl_ridx = 0; - pp->dispatch_log = dtl; - pp->dispatch_log_end = dtl + N_DISPATCH_LOG; - pp->dtl_curr = dtl; - } + alloc_dtl_buffers(0); /* Register the DTL for the current (boot) cpu */ - dtl = get_paca()->dispatch_log; - get_paca()->dtl_ridx = 0; - get_paca()->dtl_curr = dtl; - get_paca()->lppaca_ptr->dtl_idx = 0; - - /* hypervisor reads buffer length from this field */ - dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); - ret = register_dtl(hard_smp_processor_id(), __pa(dtl)); - if (ret) - pr_err("WARNING: DTL registration of cpu %d (hw %d) failed " - "with %d\n", smp_processor_id(), - hard_smp_processor_id(), ret); - get_paca()->lppaca_ptr->dtl_enable_mask = 2; + register_dtl_buffer(smp_processor_id()); return 0; } @@ -793,6 +765,9 @@ static void __init pSeries_setup_arch(void) } ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare; + + if (swiotlb_force == SWIOTLB_FORCE) + ppc_swiotlb_enable = 1; } static void pseries_panic(char *str) diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index ba758f4be328..6601b9d404dc 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -520,7 +520,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page, if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) goto out_fail; - ret = iommu_map_page(dev, tbl, page, offset, size, device_to_mask(dev), + ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev), direction, attrs); if (unlikely(ret == DMA_MAPPING_ERROR)) goto out_deallocate; @@ -560,7 +560,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, if (vio_cmo_alloc(viodev, alloc_size)) goto out_fail; - ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, device_to_mask(dev), + ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev), direction, attrs); if (unlikely(!ret)) goto out_deallocate; diff --git a/arch/powerpc/mm/book3s64/vphn.c b/arch/powerpc/platforms/pseries/vphn.c index 0ee7734afb50..3f07bf6c670e 100644 --- a/arch/powerpc/mm/book3s64/vphn.c +++ b/arch/powerpc/platforms/pseries/vphn.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/byteorder.h> -#include "vphn.h" +#include <asm/lppaca.h> /* * The associativity domain numbers are returned from the hypervisor as a @@ -22,7 +22,7 @@ * * Convert to the sequence they would appear in the ibm,associativity property. */ -int vphn_unpack_associativity(const long *packed, __be32 *unpacked) +static int vphn_unpack_associativity(const long *packed, __be32 *unpacked) { __be64 be_packed[VPHN_REGISTER_COUNT]; int i, nr_assoc_doms = 0; @@ -71,3 +71,19 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked) return nr_assoc_doms; } + +/* NOTE: This file is included by a selftest and built in userspace. */ +#ifdef __KERNEL__ +#include <asm/hvcall.h> + +long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity) +{ + long rc; + long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; + + rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu); + vphn_unpack_associativity(retbuf, associativity); + + return rc; +} +#endif diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index e0dbec780fe9..d23288c4abf6 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config PPC4xx_PCI_EXPRESS diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index aaf23283ba0c..9d73dfddf060 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -37,12 +37,10 @@ obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o obj-$(CONFIG_OF_RTC) += of_rtc.o obj-$(CONFIG_CPM) += cpm_common.o -obj-$(CONFIG_CPM1) += cpm1.o obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o cpm_gpio.o obj-$(CONFIG_8xx_GPIO) += cpm_gpio.o obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o obj-$(CONFIG_PPC_DCR) += dcr.o -obj-$(CONFIG_UCODE_PATCH) += micropatch.o obj-$(CONFIG_PPC_MPC512x) += mpc5xxx_clocks.o obj-$(CONFIG_PPC_MPC52xx) += mpc5xxx_clocks.o diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index e5519875cf17..21a1fae0714e 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -144,7 +144,7 @@ static void dart_cache_sync(unsigned int *base, unsigned int count) unsigned int tmp; /* Perform a standard cache flush */ - flush_inval_dcache_range(start, end); + flush_dcache_range(start, end); /* * Perform the sequence described in the CPC925 manual to diff --git a/arch/powerpc/sysdev/micropatch.c b/arch/powerpc/sysdev/micropatch.c deleted file mode 100644 index 33a9042fca80..000000000000 --- a/arch/powerpc/sysdev/micropatch.c +++ /dev/null @@ -1,749 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* Microcode patches for the CPM as supplied by Motorola. - * This is the one for IIC/SPI. There is a newer one that - * also relocates SMC2, but this would require additional changes - * to uart.c, so I am holding off on that for a moment. - */ -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/param.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <asm/irq.h> -#include <asm/page.h> -#include <asm/pgtable.h> -#include <asm/8xx_immap.h> -#include <asm/cpm.h> -#include <asm/cpm1.h> - -/* - * I2C/SPI relocation patch arrays. - */ - -#ifdef CONFIG_I2C_SPI_UCODE_PATCH - -static uint patch_2000[] __initdata = { - 0x7FFFEFD9, - 0x3FFD0000, - 0x7FFB49F7, - 0x7FF90000, - 0x5FEFADF7, - 0x5F89ADF7, - 0x5FEFAFF7, - 0x5F89AFF7, - 0x3A9CFBC8, - 0xE7C0EDF0, - 0x77C1E1BB, - 0xF4DC7F1D, - 0xABAD932F, - 0x4E08FDCF, - 0x6E0FAFF8, - 0x7CCF76CF, - 0xFD1FF9CF, - 0xABF88DC6, - 0xAB5679F7, - 0xB0937383, - 0xDFCE79F7, - 0xB091E6BB, - 0xE5BBE74F, - 0xB3FA6F0F, - 0x6FFB76CE, - 0xEE0DF9CF, - 0x2BFBEFEF, - 0xCFEEF9CF, - 0x76CEAD24, - 0x90B2DF9A, - 0x7FDDD0BF, - 0x4BF847FD, - 0x7CCF76CE, - 0xCFEF7E1F, - 0x7F1D7DFD, - 0xF0B6EF71, - 0x7FC177C1, - 0xFBC86079, - 0xE722FBC8, - 0x5FFFDFFF, - 0x5FB2FFFB, - 0xFBC8F3C8, - 0x94A67F01, - 0x7F1D5F39, - 0xAFE85F5E, - 0xFFDFDF96, - 0xCB9FAF7D, - 0x5FC1AFED, - 0x8C1C5FC1, - 0xAFDD5FC3, - 0xDF9A7EFD, - 0xB0B25FB2, - 0xFFFEABAD, - 0x5FB2FFFE, - 0x5FCE600B, - 0xE6BB600B, - 0x5FCEDFC6, - 0x27FBEFDF, - 0x5FC8CFDE, - 0x3A9CE7C0, - 0xEDF0F3C8, - 0x7F0154CD, - 0x7F1D2D3D, - 0x363A7570, - 0x7E0AF1CE, - 0x37EF2E68, - 0x7FEE10EC, - 0xADF8EFDE, - 0xCFEAE52F, - 0x7D0FE12B, - 0xF1CE5F65, - 0x7E0A4DF8, - 0xCFEA5F72, - 0x7D0BEFEE, - 0xCFEA5F74, - 0xE522EFDE, - 0x5F74CFDA, - 0x0B627385, - 0xDF627E0A, - 0x30D8145B, - 0xBFFFF3C8, - 0x5FFFDFFF, - 0xA7F85F5E, - 0xBFFE7F7D, - 0x10D31450, - 0x5F36BFFF, - 0xAF785F5E, - 0xBFFDA7F8, - 0x5F36BFFE, - 0x77FD30C0, - 0x4E08FDCF, - 0xE5FF6E0F, - 0xAFF87E1F, - 0x7E0FFD1F, - 0xF1CF5F1B, - 0xABF80D5E, - 0x5F5EFFEF, - 0x79F730A2, - 0xAFDD5F34, - 0x47F85F34, - 0xAFED7FDD, - 0x50B24978, - 0x47FD7F1D, - 0x7DFD70AD, - 0xEF717EC1, - 0x6BA47F01, - 0x2D267EFD, - 0x30DE5F5E, - 0xFFFD5F5E, - 0xFFEF5F5E, - 0xFFDF0CA0, - 0xAFED0A9E, - 0xAFDD0C3A, - 0x5F3AAFBD, - 0x7FBDB082, - 0x5F8247F8 -}; - -static uint patch_2f00[] __initdata = { - 0x3E303430, - 0x34343737, - 0xABF7BF9B, - 0x994B4FBD, - 0xBD599493, - 0x349FFF37, - 0xFB9B177D, - 0xD9936956, - 0xBBFDD697, - 0xBDD2FD11, - 0x31DB9BB3, - 0x63139637, - 0x93733693, - 0x193137F7, - 0x331737AF, - 0x7BB9B999, - 0xBB197957, - 0x7FDFD3D5, - 0x73B773F7, - 0x37933B99, - 0x1D115316, - 0x99315315, - 0x31694BF4, - 0xFBDBD359, - 0x31497353, - 0x76956D69, - 0x7B9D9693, - 0x13131979, - 0x79376935 -}; -#endif - -/* - * I2C/SPI/SMC1 relocation patch arrays. - */ - -#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH - -static uint patch_2000[] __initdata = { - 0x3fff0000, - 0x3ffd0000, - 0x3ffb0000, - 0x3ff90000, - 0x5f13eff8, - 0x5eb5eff8, - 0x5f88adf7, - 0x5fefadf7, - 0x3a9cfbc8, - 0x77cae1bb, - 0xf4de7fad, - 0xabae9330, - 0x4e08fdcf, - 0x6e0faff8, - 0x7ccf76cf, - 0xfdaff9cf, - 0xabf88dc8, - 0xab5879f7, - 0xb0925d8d, - 0xdfd079f7, - 0xb090e6bb, - 0xe5bbe74f, - 0x9e046f0f, - 0x6ffb76ce, - 0xee0cf9cf, - 0x2bfbefef, - 0xcfeef9cf, - 0x76cead23, - 0x90b3df99, - 0x7fddd0c1, - 0x4bf847fd, - 0x7ccf76ce, - 0xcfef77ca, - 0x7eaf7fad, - 0x7dfdf0b7, - 0xef7a7fca, - 0x77cafbc8, - 0x6079e722, - 0xfbc85fff, - 0xdfff5fb3, - 0xfffbfbc8, - 0xf3c894a5, - 0xe7c9edf9, - 0x7f9a7fad, - 0x5f36afe8, - 0x5f5bffdf, - 0xdf95cb9e, - 0xaf7d5fc3, - 0xafed8c1b, - 0x5fc3afdd, - 0x5fc5df99, - 0x7efdb0b3, - 0x5fb3fffe, - 0xabae5fb3, - 0xfffe5fd0, - 0x600be6bb, - 0x600b5fd0, - 0xdfc827fb, - 0xefdf5fca, - 0xcfde3a9c, - 0xe7c9edf9, - 0xf3c87f9e, - 0x54ca7fed, - 0x2d3a3637, - 0x756f7e9a, - 0xf1ce37ef, - 0x2e677fee, - 0x10ebadf8, - 0xefdecfea, - 0xe52f7d9f, - 0xe12bf1ce, - 0x5f647e9a, - 0x4df8cfea, - 0x5f717d9b, - 0xefeecfea, - 0x5f73e522, - 0xefde5f73, - 0xcfda0b61, - 0x5d8fdf61, - 0xe7c9edf9, - 0x7e9a30d5, - 0x1458bfff, - 0xf3c85fff, - 0xdfffa7f8, - 0x5f5bbffe, - 0x7f7d10d0, - 0x144d5f33, - 0xbfffaf78, - 0x5f5bbffd, - 0xa7f85f33, - 0xbffe77fd, - 0x30bd4e08, - 0xfdcfe5ff, - 0x6e0faff8, - 0x7eef7e9f, - 0xfdeff1cf, - 0x5f17abf8, - 0x0d5b5f5b, - 0xffef79f7, - 0x309eafdd, - 0x5f3147f8, - 0x5f31afed, - 0x7fdd50af, - 0x497847fd, - 0x7f9e7fed, - 0x7dfd70a9, - 0xef7e7ece, - 0x6ba07f9e, - 0x2d227efd, - 0x30db5f5b, - 0xfffd5f5b, - 0xffef5f5b, - 0xffdf0c9c, - 0xafed0a9a, - 0xafdd0c37, - 0x5f37afbd, - 0x7fbdb081, - 0x5f8147f8, - 0x3a11e710, - 0xedf0ccdd, - 0xf3186d0a, - 0x7f0e5f06, - 0x7fedbb38, - 0x3afe7468, - 0x7fedf4fc, - 0x8ffbb951, - 0xb85f77fd, - 0xb0df5ddd, - 0xdefe7fed, - 0x90e1e74d, - 0x6f0dcbf7, - 0xe7decfed, - 0xcb74cfed, - 0xcfeddf6d, - 0x91714f74, - 0x5dd2deef, - 0x9e04e7df, - 0xefbb6ffb, - 0xe7ef7f0e, - 0x9e097fed, - 0xebdbeffa, - 0xeb54affb, - 0x7fea90d7, - 0x7e0cf0c3, - 0xbffff318, - 0x5fffdfff, - 0xac59efea, - 0x7fce1ee5, - 0xe2ff5ee1, - 0xaffbe2ff, - 0x5ee3affb, - 0xf9cc7d0f, - 0xaef8770f, - 0x7d0fb0c6, - 0xeffbbfff, - 0xcfef5ede, - 0x7d0fbfff, - 0x5ede4cf8, - 0x7fddd0bf, - 0x49f847fd, - 0x7efdf0bb, - 0x7fedfffd, - 0x7dfdf0b7, - 0xef7e7e1e, - 0x5ede7f0e, - 0x3a11e710, - 0xedf0ccab, - 0xfb18ad2e, - 0x1ea9bbb8, - 0x74283b7e, - 0x73c2e4bb, - 0x2ada4fb8, - 0xdc21e4bb, - 0xb2a1ffbf, - 0x5e2c43f8, - 0xfc87e1bb, - 0xe74ffd91, - 0x6f0f4fe8, - 0xc7ba32e2, - 0xf396efeb, - 0x600b4f78, - 0xe5bb760b, - 0x53acaef8, - 0x4ef88b0e, - 0xcfef9e09, - 0xabf8751f, - 0xefef5bac, - 0x741f4fe8, - 0x751e760d, - 0x7fdbf081, - 0x741cafce, - 0xefcc7fce, - 0x751e70ac, - 0x741ce7bb, - 0x3372cfed, - 0xafdbefeb, - 0xe5bb760b, - 0x53f2aef8, - 0xafe8e7eb, - 0x4bf8771e, - 0x7e247fed, - 0x4fcbe2cc, - 0x7fbc30a9, - 0x7b0f7a0f, - 0x34d577fd, - 0x308b5db7, - 0xde553e5f, - 0xaf78741f, - 0x741f30f0, - 0xcfef5e2c, - 0x741f3eac, - 0xafb8771e, - 0x5e677fed, - 0x0bd3e2cc, - 0x741ccfec, - 0xe5ca53cd, - 0x6fcb4f74, - 0x5dadde4b, - 0x2ab63d38, - 0x4bb3de30, - 0x751f741c, - 0x6c42effa, - 0xefea7fce, - 0x6ffc30be, - 0xefec3fca, - 0x30b3de2e, - 0xadf85d9e, - 0xaf7daefd, - 0x5d9ede2e, - 0x5d9eafdd, - 0x761f10ac, - 0x1da07efd, - 0x30adfffe, - 0x4908fb18, - 0x5fffdfff, - 0xafbb709b, - 0x4ef85e67, - 0xadf814ad, - 0x7a0f70ad, - 0xcfef50ad, - 0x7a0fde30, - 0x5da0afed, - 0x3c12780f, - 0xefef780f, - 0xefef790f, - 0xa7f85e0f, - 0xffef790f, - 0xefef790f, - 0x14adde2e, - 0x5d9eadfd, - 0x5e2dfffb, - 0xe79addfd, - 0xeff96079, - 0x607ae79a, - 0xddfceff9, - 0x60795dff, - 0x607acfef, - 0xefefefdf, - 0xefbfef7f, - 0xeeffedff, - 0xebffe7ff, - 0xafefafdf, - 0xafbfaf7f, - 0xaeffadff, - 0xabffa7ff, - 0x6fef6fdf, - 0x6fbf6f7f, - 0x6eff6dff, - 0x6bff67ff, - 0x2fef2fdf, - 0x2fbf2f7f, - 0x2eff2dff, - 0x2bff27ff, - 0x4e08fd1f, - 0xe5ff6e0f, - 0xaff87eef, - 0x7e0ffdef, - 0xf11f6079, - 0xabf8f542, - 0x7e0af11c, - 0x37cfae3a, - 0x7fec90be, - 0xadf8efdc, - 0xcfeae52f, - 0x7d0fe12b, - 0xf11c6079, - 0x7e0a4df8, - 0xcfea5dc4, - 0x7d0befec, - 0xcfea5dc6, - 0xe522efdc, - 0x5dc6cfda, - 0x4e08fd1f, - 0x6e0faff8, - 0x7c1f761f, - 0xfdeff91f, - 0x6079abf8, - 0x761cee24, - 0xf91f2bfb, - 0xefefcfec, - 0xf91f6079, - 0x761c27fb, - 0xefdf5da7, - 0xcfdc7fdd, - 0xd09c4bf8, - 0x47fd7c1f, - 0x761ccfcf, - 0x7eef7fed, - 0x7dfdf093, - 0xef7e7f1e, - 0x771efb18, - 0x6079e722, - 0xe6bbe5bb, - 0xae0ae5bb, - 0x600bae85, - 0xe2bbe2bb, - 0xe2bbe2bb, - 0xaf02e2bb, - 0xe2bb2ff9, - 0x6079e2bb -}; - -static uint patch_2f00[] __initdata = { - 0x30303030, - 0x3e3e3434, - 0xabbf9b99, - 0x4b4fbdbd, - 0x59949334, - 0x9fff37fb, - 0x9b177dd9, - 0x936956bb, - 0xfbdd697b, - 0xdd2fd113, - 0x1db9f7bb, - 0x36313963, - 0x79373369, - 0x3193137f, - 0x7331737a, - 0xf7bb9b99, - 0x9bb19795, - 0x77fdfd3d, - 0x573b773f, - 0x737933f7, - 0xb991d115, - 0x31699315, - 0x31531694, - 0xbf4fbdbd, - 0x35931497, - 0x35376956, - 0xbd697b9d, - 0x96931313, - 0x19797937, - 0x6935af78, - 0xb9b3baa3, - 0xb8788683, - 0x368f78f7, - 0x87778733, - 0x3ffffb3b, - 0x8e8f78b8, - 0x1d118e13, - 0xf3ff3f8b, - 0x6bd8e173, - 0xd1366856, - 0x68d1687b, - 0x3daf78b8, - 0x3a3a3f87, - 0x8f81378f, - 0xf876f887, - 0x77fd8778, - 0x737de8d6, - 0xbbf8bfff, - 0xd8df87f7, - 0xfd876f7b, - 0x8bfff8bd, - 0x8683387d, - 0xb873d87b, - 0x3b8fd7f8, - 0xf7338883, - 0xbb8ee1f8, - 0xef837377, - 0x3337b836, - 0x817d11f8, - 0x7378b878, - 0xd3368b7d, - 0xed731b7d, - 0x833731f3, - 0xf22f3f23 -}; - -static uint patch_2e00[] __initdata = { - 0x27eeeeee, - 0xeeeeeeee, - 0xeeeeeeee, - 0xeeeeeeee, - 0xee4bf4fb, - 0xdbd259bb, - 0x1979577f, - 0xdfd2d573, - 0xb773f737, - 0x4b4fbdbd, - 0x25b9b177, - 0xd2d17376, - 0x956bbfdd, - 0x697bdd2f, - 0xff9f79ff, - 0xff9ff22f -}; -#endif - -/* - * USB SOF patch arrays. - */ - -#ifdef CONFIG_USB_SOF_UCODE_PATCH - -static uint patch_2000[] __initdata = { - 0x7fff0000, - 0x7ffd0000, - 0x7ffb0000, - 0x49f7ba5b, - 0xba383ffb, - 0xf9b8b46d, - 0xe5ab4e07, - 0xaf77bffe, - 0x3f7bbf79, - 0xba5bba38, - 0xe7676076, - 0x60750000 -}; - -static uint patch_2f00[] __initdata = { - 0x3030304c, - 0xcab9e441, - 0xa1aaf220 -}; -#endif - -void __init cpm_load_patch(cpm8xx_t *cp) -{ - volatile uint *dp; /* Dual-ported RAM. */ - volatile cpm8xx_t *commproc; -#if defined(CONFIG_I2C_SPI_UCODE_PATCH) || \ - defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH) - volatile iic_t *iip; - volatile struct spi_pram *spp; -#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH - volatile smc_uart_t *smp; -#endif -#endif - int i; - - commproc = cp; - -#ifdef CONFIG_USB_SOF_UCODE_PATCH - commproc->cp_rccr = 0; - - dp = (uint *)(commproc->cp_dpmem); - for (i=0; i<(sizeof(patch_2000)/4); i++) - *dp++ = patch_2000[i]; - - dp = (uint *)&(commproc->cp_dpmem[0x0f00]); - for (i=0; i<(sizeof(patch_2f00)/4); i++) - *dp++ = patch_2f00[i]; - - commproc->cp_rccr = 0x0009; - - printk("USB SOF microcode patch installed\n"); -#endif /* CONFIG_USB_SOF_UCODE_PATCH */ - -#if defined(CONFIG_I2C_SPI_UCODE_PATCH) || \ - defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH) - - commproc->cp_rccr = 0; - - dp = (uint *)(commproc->cp_dpmem); - for (i=0; i<(sizeof(patch_2000)/4); i++) - *dp++ = patch_2000[i]; - - dp = (uint *)&(commproc->cp_dpmem[0x0f00]); - for (i=0; i<(sizeof(patch_2f00)/4); i++) - *dp++ = patch_2f00[i]; - - iip = (iic_t *)&commproc->cp_dparam[PROFF_IIC]; -# define RPBASE 0x0500 - iip->iic_rpbase = RPBASE; - - /* Put SPI above the IIC, also 32-byte aligned. - */ - i = (RPBASE + sizeof(iic_t) + 31) & ~31; - spp = (struct spi_pram *)&commproc->cp_dparam[PROFF_SPI]; - spp->rpbase = i; - -# if defined(CONFIG_I2C_SPI_UCODE_PATCH) - commproc->cp_cpmcr1 = 0x802a; - commproc->cp_cpmcr2 = 0x8028; - commproc->cp_cpmcr3 = 0x802e; - commproc->cp_cpmcr4 = 0x802c; - commproc->cp_rccr = 1; - - printk("I2C/SPI microcode patch installed.\n"); -# endif /* CONFIG_I2C_SPI_UCODE_PATCH */ - -# if defined(CONFIG_I2C_SPI_SMC1_UCODE_PATCH) - - dp = (uint *)&(commproc->cp_dpmem[0x0e00]); - for (i=0; i<(sizeof(patch_2e00)/4); i++) - *dp++ = patch_2e00[i]; - - commproc->cp_cpmcr1 = 0x8080; - commproc->cp_cpmcr2 = 0x808a; - commproc->cp_cpmcr3 = 0x8028; - commproc->cp_cpmcr4 = 0x802a; - commproc->cp_rccr = 3; - - smp = (smc_uart_t *)&commproc->cp_dparam[PROFF_SMC1]; - smp->smc_rpbase = 0x1FC0; - - printk("I2C/SPI/SMC1 microcode patch installed.\n"); -# endif /* CONFIG_I2C_SPI_SMC1_UCODE_PATCH) */ - -#endif /* some variation of the I2C/SPI patch was selected */ -} - -/* - * Take this entire routine out, since no one calls it and its - * logic is suspect. - */ - -#if 0 -void -verify_patch(volatile immap_t *immr) -{ - volatile uint *dp; - volatile cpm8xx_t *commproc; - int i; - - commproc = (cpm8xx_t *)&immr->im_cpm; - - printk("cp_rccr %x\n", commproc->cp_rccr); - commproc->cp_rccr = 0; - - dp = (uint *)(commproc->cp_dpmem); - for (i=0; i<(sizeof(patch_2000)/4); i++) - if (*dp++ != patch_2000[i]) { - printk("patch_2000 bad at %d\n", i); - dp--; - printk("found 0x%X, wanted 0x%X\n", *dp, patch_2000[i]); - break; - } - - dp = (uint *)&(commproc->cp_dpmem[0x0f00]); - for (i=0; i<(sizeof(patch_2f00)/4); i++) - if (*dp++ != patch_2f00[i]) { - printk("patch_2f00 bad at %d\n", i); - dp--; - printk("found 0x%X, wanted 0x%X\n", *dp, patch_2f00[i]); - break; - } - - commproc->cp_rccr = 0x0009; -} -#endif diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 86fee428f5f1..304614c920aa 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -1,15 +1,14 @@ # SPDX-License-Identifier: GPL-2.0 config PPC_XICS - def_bool n - select PPC_SMP_MUXED_IPI - select HARDIRQS_SW_RESEND + def_bool n + select PPC_SMP_MUXED_IPI + select HARDIRQS_SW_RESEND config PPC_ICP_NATIVE - def_bool n + def_bool n config PPC_ICP_HV - def_bool n + def_bool n config PPC_ICS_RTAS - def_bool n - + def_bool n diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index cafb5c4df26b..8ef9cf4ebb1c 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -16,6 +16,7 @@ #include <linux/cpumask.h> #include <linux/mm.h> #include <linux/delay.h> +#include <linux/libfdt.h> #include <asm/prom.h> #include <asm/io.h> @@ -659,6 +660,55 @@ static bool xive_get_max_prio(u8 *max_prio) return true; } +static const u8 *get_vec5_feature(unsigned int index) +{ + unsigned long root, chosen; + int size; + const u8 *vec5; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) + return NULL; + + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) + return NULL; + + if (size <= index) + return NULL; + + return vec5 + index; +} + +static bool xive_spapr_disabled(void) +{ + const u8 *vec5_xive; + + vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT)); + if (vec5_xive) { + u8 val; + + val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT); + switch (val) { + case OV5_FEAT(OV5_XIVE_EITHER): + case OV5_FEAT(OV5_XIVE_LEGACY): + break; + case OV5_FEAT(OV5_XIVE_EXPLOIT): + /* Hypervisor only supports XIVE */ + if (xive_cmdline_disabled) + pr_warn("WARNING: Ignoring cmdline option xive=off\n"); + return false; + default: + pr_warn("%s: Unknown xive support option: 0x%x\n", + __func__, val); + break; + } + } + + return xive_cmdline_disabled; +} + bool __init xive_spapr_init(void) { struct device_node *np; @@ -671,7 +721,7 @@ bool __init xive_spapr_init(void) const __be32 *reg; int i; - if (xive_cmdline_disabled) + if (xive_spapr_disabled()) return false; pr_devel("%s()\n", __func__); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d0620d762a5a..14e56c25879f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -465,8 +465,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi) local_irq_save(flags); hard_irq_disable(); - tracing_enabled = tracing_is_on(); - tracing_off(); + if (!fromipi) { + tracing_enabled = tracing_is_on(); + tracing_off(); + } bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { @@ -2448,7 +2450,9 @@ static void dump_one_paca(int cpu) DUMP(p, canary, "%#-*lx"); #endif DUMP(p, saved_r1, "%#-*llx"); +#ifdef CONFIG_PPC_BOOK3E DUMP(p, trap_save, "%#-*x"); +#endif DUMP(p, irq_soft_mask, "%#-*x"); DUMP(p, irq_happened, "%#-*x"); #ifdef CONFIG_MMIOWB @@ -3090,7 +3094,7 @@ static void show_pte(unsigned long addr) printf("pgd @ 0x%px\n", pgdir); - if (pgd_huge(*pgdp)) { + if (pgd_is_leaf(*pgdp)) { format_pte(pgdp, pgd_val(*pgdp)); return; } @@ -3103,7 +3107,7 @@ static void show_pte(unsigned long addr) return; } - if (pud_huge(*pudp)) { + if (pud_is_leaf(*pudp)) { format_pte(pudp, pud_val(*pudp)); return; } @@ -3117,7 +3121,7 @@ static void show_pte(unsigned long addr) return; } - if (pmd_huge(*pmdp)) { + if (pmd_is_leaf(*pmdp)) { format_pte(pmdp, pmd_val(*pmdp)); return; } diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 0c4b12205632..13a1c0d04e9e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only # # For a description of the syntax of this configuration file, -# see Documentation/kbuild/kconfig-language.txt. +# see Documentation/kbuild/kconfig-language.rst. # config 64BIT @@ -17,6 +17,7 @@ config RISCV select OF select OF_EARLY_FLATTREE select OF_IRQ + select ARCH_HAS_BINFMT_FLAT select ARCH_WANT_FRAME_POINTERS select CLONE_BACKWARDS select COMMON_CLK @@ -50,6 +51,7 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB select HAVE_EBPF_JIT if 64BIT + select EDAC_SUPPORT config MMU def_bool y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 6b0741c9f348..f8b3b07e4247 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -16,8 +16,6 @@ endif KBUILD_AFLAGS_MODULE += -fPIC KBUILD_CFLAGS_MODULE += -fPIC -KBUILD_DEFCONFIG = defconfig - export BITS ifeq ($(CONFIG_ARCH_RV64I),y) BITS := 64 diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi index 3c06ee4b2b29..40983491b95f 100644 --- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -163,6 +163,7 @@ interrupt-parent = <&plic0>; interrupts = <4>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; uart1: serial@10011000 { compatible = "sifive,fu540-c000-uart", "sifive,uart0"; @@ -170,6 +171,7 @@ interrupt-parent = <&plic0>; interrupts = <5>; clocks = <&prci PRCI_CLK_TLCLK>; + status = "disabled"; }; i2c0: i2c@10030000 { compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; @@ -181,6 +183,7 @@ reg-io-width = <1>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi0: spi@10040000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -191,6 +194,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi1: spi@10041000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -201,6 +205,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; qspi2: spi@10050000 { compatible = "sifive,fu540-c000-spi", "sifive,spi0"; @@ -210,6 +215,7 @@ clocks = <&prci PRCI_CLK_TLCLK>; #address-cells = <1>; #size-cells = <0>; + status = "disabled"; }; }; }; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts index 4da88707e28f..0b55c53c08c7 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -42,7 +42,20 @@ }; }; +&uart0 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&i2c0 { + status = "okay"; +}; + &qspi0 { + status = "okay"; flash@0 { compatible = "issi,is25wp256", "jedec,spi-nor"; reg = <0>; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 4f02967e55de..04944fb4fa7a 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -69,6 +69,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_CLK_SIFIVE=y CONFIG_CLK_SIFIVE_FU540_PRCI=y CONFIG_SIFIVE_PLIC=y +CONFIG_SPI_SIFIVE=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -84,4 +85,8 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y +CONFIG_SPI=y +CONFIG_MMC_SPI=y +CONFIG_MMC=y +CONFIG_DEVTMPFS_MOUNT=y # CONFIG_RCU_TRACE is not set diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 5ee646619cc3..1efaeddf1e4b 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -5,6 +5,7 @@ generic-y += compat.h generic-y += device.h generic-y += div64.h generic-y += extable.h +generic-y += flat.h generic-y += dma.h generic-y += dma-contiguous.h generic-y += dma-mapping.h diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 9038aeb900a6..96f95c9ebd97 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -38,11 +38,11 @@ static __always_inline void atomic_set(atomic_t *v, int i) #ifndef CONFIG_GENERIC_ATOMIC64 #define ATOMIC64_INIT(i) { (i) } -static __always_inline long atomic64_read(const atomic64_t *v) +static __always_inline s64 atomic64_read(const atomic64_t *v) { return READ_ONCE(v->counter); } -static __always_inline void atomic64_set(atomic64_t *v, long i) +static __always_inline void atomic64_set(atomic64_t *v, s64 i) { WRITE_ONCE(v->counter, i); } @@ -66,11 +66,11 @@ void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) + ATOMIC_OP (op, asm_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_OP (op, asm_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, I, d, long, 64) + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, s64, 64) #endif ATOMIC_OPS(add, add, i) @@ -127,14 +127,14 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ - ATOMIC_FETCH_OP( op, asm_op, I, d, long, 64) \ - ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, long, 64) + ATOMIC_FETCH_OP( op, asm_op, I, w, int, ) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, w, int, ) \ + ATOMIC_FETCH_OP( op, asm_op, I, d, s64, 64) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I, d, s64, 64) #endif ATOMIC_OPS(add, add, +, i) @@ -166,11 +166,11 @@ ATOMIC_OPS(sub, add, +, -i) #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) #else #define ATOMIC_OPS(op, asm_op, I) \ - ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, I, d, long, 64) + ATOMIC_FETCH_OP(op, asm_op, I, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, d, s64, 64) #endif ATOMIC_OPS(and, and, i) @@ -219,9 +219,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) #define atomic_fetch_add_unless atomic_fetch_add_unless #ifndef CONFIG_GENERIC_ATOMIC64 -static __always_inline long atomic64_fetch_add_unless(atomic64_t *v, long a, long u) +static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { - long prev, rc; + s64 prev; + long rc; __asm__ __volatile__ ( "0: lr.d %[p], %[c]\n" @@ -290,11 +291,11 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n) \ #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS() \ - ATOMIC_OP( int, , 4) + ATOMIC_OP(int, , 4) #else #define ATOMIC_OPS() \ - ATOMIC_OP( int, , 4) \ - ATOMIC_OP(long, 64, 8) + ATOMIC_OP(int, , 4) \ + ATOMIC_OP(s64, 64, 8) #endif ATOMIC_OPS() @@ -332,9 +333,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset) #define atomic_dec_if_positive(v) atomic_sub_if_positive(v, 1) #ifndef CONFIG_GENERIC_ATOMIC64 -static __always_inline long atomic64_sub_if_positive(atomic64_t *v, int offset) +static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset) { - long prev, rc; + s64 prev; + long rc; __asm__ __volatile__ ( "0: lr.d %[p], %[c]\n" diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index f653bfc8a83b..07ceee8b1747 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -86,7 +86,7 @@ struct task_struct; extern void die(struct pt_regs *regs, const char *str); extern void do_trap(struct pt_regs *regs, int signo, int code, - unsigned long addr, struct task_struct *tsk); + unsigned long addr); #endif /* !__ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index eb8b0195f27f..56a67d66f72f 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include <linux/mm.h> #include <asm/tlb.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { @@ -74,33 +76,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #endif /* __PAGETABLE_PMD_FOLDED */ -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page( - GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); -} - -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO); - if (likely(pte != NULL)) - pgtable_page_ctor(pte); - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb, pte, buf) \ do { \ pgtable_page_dtor(pte); \ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 1fe1b02e44d0..b14d7647d800 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -126,7 +126,7 @@ badframe: task->comm, task_pid_nr(task), __func__, frame, (void *)regs->sepc, (void *)regs->sp); } - force_sig(SIGSEGV, task); + force_sig(SIGSEGV); return 0; } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 6b32190ba73c..424eb72d56b1 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -55,9 +55,10 @@ void die(struct pt_regs *regs, const char *str) do_exit(SIGSEGV); } -void do_trap(struct pt_regs *regs, int signo, int code, - unsigned long addr, struct task_struct *tsk) +void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) { + struct task_struct *tsk = current; + if (show_unhandled_signals && unhandled_signal(tsk, signo) && printk_ratelimit()) { pr_info("%s[%d]: unhandled signal %d code 0x%x at 0x" REG_FMT, @@ -67,14 +68,14 @@ void do_trap(struct pt_regs *regs, int signo, int code, show_regs(regs); } - force_sig_fault(signo, code, (void __user *)addr, tsk); + force_sig_fault(signo, code, (void __user *)addr); } static void do_trap_error(struct pt_regs *regs, int signo, int code, unsigned long addr, const char *str) { if (user_mode(regs)) { - do_trap(regs, signo, code, addr, current); + do_trap(regs, signo, code, addr); } else { if (!fixup_exception(regs)) die(regs, str); @@ -140,7 +141,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs) } #endif /* CONFIG_GENERIC_BUG */ - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc)); } #ifdef CONFIG_GENERIC_BUG diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3e2708c626a8..96add1427a75 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -169,7 +169,7 @@ bad_area: up_read(&mm->mmap_sem); /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - do_trap(regs, SIGSEGV, code, addr, tsk); + do_trap(regs, SIGSEGV, code, addr); return; } @@ -205,7 +205,7 @@ do_sigbus: /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) goto no_context; - do_trap(regs, SIGBUS, BUS_ADRERR, addr, tsk); + do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; vmalloc_fault: @@ -219,7 +219,7 @@ vmalloc_fault: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) - return do_trap(regs, SIGSEGV, code, addr, tsk); + return do_trap(regs, SIGSEGV, code, addr); /* * Synchronize this task's top level page-table @@ -272,9 +272,6 @@ vmalloc_fault: * entries, but in RISC-V, SFENCE.VMA specifies an * ordering constraint, not a cache flush; it is * necessary even after writing invalid entries. - * Relying on flush_tlb_fix_spurious_fault would - * suffice, but the extra traps reduce - * performance. So, eagerly SFENCE.VMA. */ local_flush_tlb_page(addr); diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 426d5c33ea90..5451ef3845f2 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -731,6 +731,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; + struct bpf_prog_aux *aux = ctx->prog->aux; int rvoff, i = insn - ctx->prog->insnsi; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; @@ -742,8 +743,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_zext_32(rd, ctx); + break; + } emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -751,49 +757,49 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X: emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: emit(rv_and(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: emit(rv_or(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit(rv_xor(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_X: @@ -805,13 +811,13 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -820,7 +826,7 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_NEG: emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : rv_subw(rd, RV_REG_ZERO, rd), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -885,7 +891,7 @@ out_be: case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: emit_imm(rd, imm, ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -900,7 +906,7 @@ out_be: emit(is64 ? rv_add(rd, rd, RV_REG_T1) : rv_addw(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_K: @@ -913,7 +919,7 @@ out_be: emit(is64 ? rv_sub(rd, rd, RV_REG_T1) : rv_subw(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_AND | BPF_K: @@ -924,7 +930,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_and(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_OR | BPF_K: @@ -935,7 +941,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_or(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_K: @@ -946,7 +952,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(rv_xor(rd, rd, RV_REG_T1), ctx); } - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_K: @@ -954,7 +960,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_mul(rd, rd, RV_REG_T1) : rv_mulw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_DIV | BPF_K: @@ -962,7 +968,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : rv_divuw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: @@ -970,7 +976,7 @@ out_be: emit_imm(RV_REG_T1, imm, ctx); emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : rv_remuw(rd, rd, RV_REG_T1), ctx); - if (!is64) + if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_LSH | BPF_K: @@ -1263,6 +1269,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_H: if (is_12b_int(off)) { @@ -1273,6 +1281,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_W: if (is_12b_int(off)) { @@ -1283,6 +1293,8 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx); emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_LDX | BPF_MEM | BPF_DW: if (is_12b_int(off)) { @@ -1527,6 +1539,11 @@ static void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 109243fdb6ec..5d8570ed6cab 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +config ARCH_HAS_MEM_ENCRYPT + def_bool y + config MMU def_bool y @@ -30,7 +33,7 @@ config GENERIC_BUG_RELATIVE_POINTERS def_bool y config GENERIC_LOCKBREAK - def_bool y if SMP && PREEMPT + def_bool y if PREEMPT config PGSTE def_bool y if KVM @@ -113,7 +116,6 @@ config S390 select DYNAMIC_FTRACE if FUNCTION_TRACER select GENERIC_CLOCKEVENTS select GENERIC_CPU_AUTOPROBE - select GENERIC_CPU_DEVICES if !SMP select GENERIC_CPU_VULNERABILITIES select GENERIC_FIND_FIRST_BIT select GENERIC_SMP_IDLE_THREAD @@ -137,6 +139,7 @@ config S390 select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_FAST_GUP select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FENTRY select HAVE_FTRACE_MCOUNT_RECORD @@ -144,7 +147,6 @@ config S390 select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS - select HAVE_GENERIC_GUP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -187,6 +189,8 @@ config S390 select VIRT_CPU_ACCOUNTING select ARCH_HAS_SCALED_CPUTIME select HAVE_NMI + select SWIOTLB + select GENERIC_ALLOCATOR config SCHED_OMIT_FRAME_POINTER @@ -399,27 +403,10 @@ config SYSVIPC_COMPAT config SMP def_bool y - prompt "Symmetric multi-processing support" - ---help--- - This enables support for systems with more than one CPU. If you have - a system with only one CPU, like most personal computers, say N. If - you have a system with more than one CPU, say Y. - - If you say N here, the kernel will run on uni- and multiprocessor - machines, but will use only one CPU of a multiprocessor machine. If - you say Y here, the kernel will run on many, but not all, - uniprocessor machines. On a uniprocessor machine, the kernel - will run faster if you say N here. - - See also the SMP-HOWTO available at - <http://www.tldp.org/docs.html#howto>. - - Even if you don't know what to do here, say Y. config NR_CPUS int "Maximum number of CPUs (2-512)" range 2 512 - depends on SMP default "64" help This allows you to specify the maximum number of CPUs which this @@ -431,12 +418,6 @@ config NR_CPUS config HOTPLUG_CPU def_bool y - prompt "Support for hot-pluggable CPUs" - depends on SMP - help - Say Y here to be able to turn CPUs off and on. CPUs - can be controlled through /sys/devices/system/cpu/cpu#. - Say N if you want to disable CPU hotplug. # Some NUMA nodes have memory ranges that span # other nodes. Even though a pfn is valid and @@ -448,7 +429,7 @@ config NODES_SPAN_OTHER_NODES config NUMA bool "NUMA support" - depends on SMP && SCHED_TOPOLOGY + depends on SCHED_TOPOLOGY default n help Enable NUMA support @@ -523,7 +504,6 @@ config SCHED_DRAWER config SCHED_TOPOLOGY def_bool y prompt "Topology scheduler support" - depends on SMP select SCHED_SMT select SCHED_MC select SCHED_BOOK @@ -661,9 +641,6 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y -config ARCH_SELECT_MEMORY_MODEL - def_bool y - config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y if SPARSEMEM @@ -763,7 +740,7 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. -endif # PCI +endif # PCI config HAS_IOMEM def_bool PCI @@ -829,16 +806,15 @@ menu "Dump support" config CRASH_DUMP bool "kernel crash dumps" - depends on SMP select KEXEC help Generate crash dump after being started by kexec. Crash dump kernels are loaded in the main kernel with kexec-tools into a specially reserved region and then later executed after a crash by kdump/kexec. - Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this. + Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this. This option also enables s390 zfcpdump. - See also <file:Documentation/s390/zfcpdump.txt> + See also <file:Documentation/s390/zfcpdump.rst> endmenu diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e48013cf50a2..e0bab7ed4123 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -10,8 +10,6 @@ # Copyright (C) 1994 by Linus Torvalds # -KBUILD_DEFCONFIG := defconfig - LD_BFD := elf64-s390 KBUILD_LDFLAGS := -m elf64_s390 KBUILD_AFLAGS_MODULE += -fPIC diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b0920b35f87b..e26d4413d34c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -88,6 +88,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_VFIO_AP=m +CONFIG_VFIO_CCW=m CONFIG_CRASH_DUMP=y CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -498,6 +499,7 @@ CONFIG_VIRTIO_PCI=m CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_S390_AP_IOMMU=y +CONFIG_S390_CCW_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y @@ -586,6 +588,7 @@ CONFIG_GDB_SCRIPTS=y CONFIG_FRAME_WARN=1024 CONFIG_READABLE_ASM=y CONFIG_UNUSED_SYMBOLS=y +CONFIG_HEADERS_INSTALL=y CONFIG_HEADERS_CHECK=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c59b922cb6c5..e4bc40073003 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -1,21 +1,22 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_USELIB=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -# CONFIG_CPU_ISOLATION is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -CONFIG_CGROUPS=y +CONFIG_NUMA_BALANCING=y +# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_BLK_CGROUP=y -CONFIG_CGROUP_SCHED=y +CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y CONFIG_CGROUP_FREEZER=y @@ -26,98 +27,402 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y -CONFIG_CHECKPOINT_RESTORE=y +CONFIG_SCHED_AUTOGROUP=y CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set +CONFIG_CHECKPOINT_RESTORE=y CONFIG_BPF_SYSCALL=y CONFIG_USERFAULTFD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y -CONFIG_LIVEPATCH=y -CONFIG_NR_CPUS=256 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_CRASH_DUMP=y -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y -CONFIG_CMM=m -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y -CONFIG_STATIC_KEYS_SELFTEST=y CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_SHA256=y CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BLK_WBT=y +CONFIG_BLK_WBT_SQ=y CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y -CONFIG_BINFMT_MISC=m +CONFIG_LIVEPATCH=y +CONFIG_TUNE_ZEC12=y +CONFIG_NR_CPUS=512 +CONFIG_NUMA=y +CONFIG_HZ_100=y +CONFIG_KEXEC_FILE=y +CONFIG_KEXEC_VERIFY_SIG=y +CONFIG_EXPOLINE=y +CONFIG_EXPOLINE_AUTO=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y CONFIG_ZBUD=m CONFIG_ZSMALLOC=m CONFIG_ZSMALLOC_STAT=y +CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_PCI=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_S390=y +CONFIG_CHSC_SCH=y +CONFIG_VFIO_AP=m +CONFIG_VFIO_CCW=m +CONFIG_CRASH_DUMP=y +CONFIG_BINFMT_MISC=m +CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_NET=y CONFIG_PACKET=y +CONFIG_PACKET_DIAG=m CONFIG_UNIX=y -CONFIG_NET_KEY=y +CONFIG_UNIX_DIAG=m +CONFIG_XFRM_USER=m +CONFIG_NET_KEY=m +CONFIG_SMC=m +CONFIG_SMC_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_SYN_COOKIES=y +CONFIG_NET_IPVTI=m +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +CONFIG_INET_XFRM_MODE_TRANSPORT=m +CONFIG_INET_XFRM_MODE_TUNNEL=m +CONFIG_INET_XFRM_MODE_BEET=m +CONFIG_INET_DIAG=m +CONFIG_INET_UDP_DIAG=m +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_HSTCP=m +CONFIG_TCP_CONG_HYBLA=m +CONFIG_TCP_CONG_SCALABLE=m +CONFIG_TCP_CONG_LP=m +CONFIG_TCP_CONG_VENO=m +CONFIG_TCP_CONG_YEAH=m +CONFIG_TCP_CONG_ILLINOIS=m +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_MIP6=m +CONFIG_INET6_XFRM_MODE_TRANSPORT=m +CONFIG_INET6_XFRM_MODE_TUNNEL=m +CONFIG_INET6_XFRM_MODE_BEET=m +CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m +CONFIG_IPV6_SIT=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_SUBTREES=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_SECMARK=y +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_TIMEOUT=y +CONFIG_NF_CONNTRACK_TIMESTAMP=y +CONFIG_NF_CONNTRACK_AMANDA=m +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_H323=m +CONFIG_NF_CONNTRACK_IRC=m +CONFIG_NF_CONNTRACK_NETBIOS_NS=m +CONFIG_NF_CONNTRACK_SNMP=m +CONFIG_NF_CONNTRACK_PPTP=m +CONFIG_NF_CONNTRACK_SANE=m +CONFIG_NF_CONNTRACK_SIP=m +CONFIG_NF_CONNTRACK_TFTP=m +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_NETLINK_TIMEOUT=m +CONFIG_NF_TABLES=m +CONFIG_NFT_CT=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_HASH=m +CONFIG_NETFILTER_XT_SET=m +CONFIG_NETFILTER_XT_TARGET_AUDIT=m +CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m +CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m +CONFIG_NETFILTER_XT_TARGET_CONNMARK=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m +CONFIG_NETFILTER_XT_TARGET_CT=m +CONFIG_NETFILTER_XT_TARGET_DSCP=m +CONFIG_NETFILTER_XT_TARGET_HMARK=m +CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m +CONFIG_NETFILTER_XT_TARGET_LOG=m +CONFIG_NETFILTER_XT_TARGET_MARK=m +CONFIG_NETFILTER_XT_TARGET_NFLOG=m +CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m +CONFIG_NETFILTER_XT_TARGET_TEE=m +CONFIG_NETFILTER_XT_TARGET_TPROXY=m +CONFIG_NETFILTER_XT_TARGET_TRACE=m +CONFIG_NETFILTER_XT_TARGET_SECMARK=m +CONFIG_NETFILTER_XT_TARGET_TCPMSS=m +CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m +CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_CLUSTER=m +CONFIG_NETFILTER_XT_MATCH_COMMENT=m +CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m +CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m +CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m +CONFIG_NETFILTER_XT_MATCH_CONNMARK=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_CPU=m +CONFIG_NETFILTER_XT_MATCH_DCCP=m +CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m +CONFIG_NETFILTER_XT_MATCH_DSCP=m +CONFIG_NETFILTER_XT_MATCH_ESP=m +CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m +CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPRANGE=m +CONFIG_NETFILTER_XT_MATCH_IPVS=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_LIMIT=m +CONFIG_NETFILTER_XT_MATCH_MAC=m +CONFIG_NETFILTER_XT_MATCH_MARK=m +CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m +CONFIG_NETFILTER_XT_MATCH_NFACCT=m +CONFIG_NETFILTER_XT_MATCH_OSF=m +CONFIG_NETFILTER_XT_MATCH_OWNER=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m +CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m +CONFIG_NETFILTER_XT_MATCH_QUOTA=m +CONFIG_NETFILTER_XT_MATCH_RATEEST=m +CONFIG_NETFILTER_XT_MATCH_REALM=m +CONFIG_NETFILTER_XT_MATCH_RECENT=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_MATCH_TCPMSS=m +CONFIG_NETFILTER_XT_MATCH_TIME=m +CONFIG_NETFILTER_XT_MATCH_U32=m +CONFIG_IP_SET=m +CONFIG_IP_SET_BITMAP_IP=m +CONFIG_IP_SET_BITMAP_IPMAC=m +CONFIG_IP_SET_BITMAP_PORT=m +CONFIG_IP_SET_HASH_IP=m +CONFIG_IP_SET_HASH_IPPORT=m +CONFIG_IP_SET_HASH_IPPORTIP=m +CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m +CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m +CONFIG_IP_SET_HASH_NETPORT=m +CONFIG_IP_SET_HASH_NETIFACE=m +CONFIG_IP_SET_LIST_SET=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m +CONFIG_IP_VS_FTP=m +CONFIG_IP_VS_PE_SIP=m +CONFIG_NF_CONNTRACK_IPV4=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NF_TABLES_ARP=y +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_AH=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_SECURITY=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_AH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_MH=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_TARGET_HL=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_SECURITY=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_TARGET_MASQUERADE=m +CONFIG_NF_TABLES_BRIDGE=y +CONFIG_RDS=m +CONFIG_RDS_RDMA=m +CONFIG_RDS_TCP=m CONFIG_L2TP=m CONFIG_L2TP_DEBUGFS=m -CONFIG_VLAN_8021Q=y +CONFIG_L2TP_V3=y +CONFIG_L2TP_IP=m +CONFIG_L2TP_ETH=m +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_MULTIQ=m CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFB=m CONFIG_NET_SCH_SFQ=m CONFIG_NET_SCH_TEQL=m CONFIG_NET_SCH_TBF=m CONFIG_NET_SCH_GRED=m CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_MQPRIO=m +CONFIG_NET_SCH_CHOKE=m +CONFIG_NET_SCH_QFQ=m +CONFIG_NET_SCH_CODEL=m +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_PLUG=m +CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_TCINDEX=m CONFIG_NET_CLS_ROUTE4=m CONFIG_NET_CLS_FW=m CONFIG_NET_CLS_U32=m +CONFIG_CLS_U32_PERF=y CONFIG_CLS_U32_MARK=y CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_FLOW=m +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=y +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_GACT=m +CONFIG_GACT_PROB=y +CONFIG_NET_ACT_MIRRED=m +CONFIG_NET_ACT_IPT=m +CONFIG_NET_ACT_NAT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_SIMP=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_CSUM=m +CONFIG_DNS_RESOLVER=y +CONFIG_OPENVSWITCH=m +CONFIG_VSOCKETS=m +CONFIG_VIRTIO_VSOCKETS=m +CONFIG_NETLINK_DIAG=m +CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_NET_PKTGEN=m CONFIG_DEVTMPFS=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=0 +CONFIG_CONNECTOR=y +CONFIG_ZRAM=m CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_DRBD=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_VIRTIO_BLK=y +CONFIG_BLK_DEV_RBD=m +CONFIG_BLK_DEV_NVME=m +CONFIG_ENCLOSURE_SERVICES=m +CONFIG_GENWQE=m +CONFIG_RAID_ATTRS=m CONFIG_SCSI=y -# CONFIG_SCSI_MQ_DEFAULT is not set CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=y -CONFIG_BLK_DEV_SR=y -CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=y +CONFIG_CHR_DEV_SCH=m +CONFIG_SCSI_ENCLOSURE=m CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SAS_LIBSAS=m +CONFIG_SCSI_SRP_ATTRS=m +CONFIG_ISCSI_TCP=m +CONFIG_SCSI_DEBUG=m CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=y +CONFIG_SCSI_VIRTIO=m +CONFIG_SCSI_DH=y +CONFIG_SCSI_DH_RDAC=m +CONFIG_SCSI_DH_HP_SW=m +CONFIG_SCSI_DH_EMC=m +CONFIG_SCSI_DH_ALUA=m +CONFIG_SCSI_OSD_INITIATOR=m +CONFIG_SCSI_OSD_ULD=m CONFIG_MD=y +CONFIG_BLK_DEV_MD=y CONFIG_MD_LINEAR=m CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_DM=y +CONFIG_MD_FAULTY=m +CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_LOG_USERSPACE=m CONFIG_DM_RAID=m @@ -125,71 +430,216 @@ CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m CONFIG_DM_MULTIPATH_ST=m +CONFIG_DM_DELAY=m CONFIG_DM_UEVENT=y +CONFIG_DM_FLAKEY=m CONFIG_DM_VERITY=m CONFIG_DM_SWITCH=m CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_DUMMY=m CONFIG_EQUALIZER=m +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m CONFIG_TUN=m -CONFIG_VIRTIO_NET=y -# CONFIG_NET_VENDOR_ALACRITECH is not set -# CONFIG_NET_VENDOR_AURORA is not set -# CONFIG_NET_VENDOR_CORTINA is not set -# CONFIG_NET_VENDOR_SOLARFLARE is not set -# CONFIG_NET_VENDOR_SOCIONEXT is not set -# CONFIG_NET_VENDOR_SYNOPSYS is not set -# CONFIG_INPUT is not set +CONFIG_VETH=m +CONFIG_VIRTIO_NET=m +CONFIG_NLMON=m +# CONFIG_NET_VENDOR_ARC is not set +# CONFIG_NET_VENDOR_CHELSIO is not set +# CONFIG_NET_VENDOR_INTEL is not set +# CONFIG_NET_VENDOR_MARVELL is not set +CONFIG_MLX4_EN=m +CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y +# CONFIG_NET_VENDOR_NATSEMI is not set +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPTP=m +CONFIG_PPPOL2TP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_ISM=m +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_DEVKMEM=y +CONFIG_LEGACY_PTY_COUNT=0 +CONFIG_HW_RANDOM_VIRTIO=m CONFIG_RAW_DRIVER=m -CONFIG_VIRTIO_BALLOON=y +CONFIG_HANGCHECK_TIMER=m +CONFIG_TN3270_FS=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_SOFT_WATCHDOG=m +CONFIG_DIAG288_WATCHDOG=m +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_MLX4_INFINIBAND=m +CONFIG_MLX5_INFINIBAND=m +CONFIG_VFIO=m +CONFIG_VFIO_PCI=m +CONFIG_VFIO_MDEV=m +CONFIG_VFIO_MDEV_DEVICE=m +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_BALLOON=m +CONFIG_VIRTIO_INPUT=y +CONFIG_S390_AP_IOMMU=y +CONFIG_S390_CCW_IOMMU=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y +CONFIG_JBD2_DEBUG=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_SECURITY=y +CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +CONFIG_GFS2_FS=m +CONFIG_GFS2_FS_LOCKING_DLM=y +CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y +CONFIG_NILFS2_FS=m +CONFIG_FS_DAX=y +CONFIG_EXPORTFS_BLOCK_OPS=y +CONFIG_FS_ENCRYPTION=y CONFIG_FANOTIFY=y +CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y +CONFIG_QUOTA_NETLINK_INTERFACE=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=y +CONFIG_CUSE=m +CONFIG_OVERLAY_FS=m +CONFIG_FSCACHE=m +CONFIG_CACHEFILES=m +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_UDF_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +CONFIG_NTFS_RW=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y -# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_CONFIGFS_FS=m +CONFIG_ECRYPT_FS=m +CONFIG_CRAMFS=m +CONFIG_SQUASHFS=m +CONFIG_SQUASHFS_XATTR=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_ROMFS_FS=m +CONFIG_NFS_FS=m +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=m +CONFIG_NFS_SWAP=y +CONFIG_NFSD=m +CONFIG_NFSD_V3_ACL=y +CONFIG_NFSD_V4=y +CONFIG_NFSD_V4_SECURITY_LABEL=y +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_STATS2=y +CONFIG_CIFS_WEAK_PW_HASH=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +# CONFIG_CIFS_DEBUG is not set +CONFIG_CIFS_DFS_UPCALL=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_ASCII=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_UTF8=m +CONFIG_DLM=m +CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_GDB_SCRIPTS=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +CONFIG_UNUSED_SYMBOLS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_LATENCYTOP=y +CONFIG_SCHED_TRACER=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_STACK_TRACER=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_HIST_TRIGGERS=y +CONFIG_LKDTM=m +CONFIG_PERCPU_TEST=m +CONFIG_ATOMIC64_SELFTEST=y +CONFIG_TEST_BPF=m +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_S390_PTDUMP=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=y +CONFIG_ENCRYPTED_KEYS=m +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_INTEGRITY_SIGNATURE=y +CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y +CONFIG_IMA=y +CONFIG_IMA_DEFAULT_HASH_SHA256=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IMA_APPRAISE=y +CONFIG_CRYPTO_FIPS=y +CONFIG_CRYPTO_DH=m +CONFIG_CRYPTO_ECDH=m +CONFIG_CRYPTO_USER=m +# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set +CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_CFB=m -CONFIG_CRYPTO_CTS=m +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_OFB=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_XTS=m -CONFIG_CRYPTO_CMAC=m +CONFIG_CRYPTO_KEYWRAP=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m +CONFIG_CRYPTO_AES_TI=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAMELLIA=m CONFIG_CRYPTO_CAST5=m @@ -199,16 +649,16 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_SM4=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_842=m CONFIG_CRYPTO_LZ4=m CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m +CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -217,38 +667,14 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m -# CONFIG_XZ_DEC_X86 is not set -# CONFIG_XZ_DEC_POWERPC is not set -# CONFIG_XZ_DEC_IA64 is not set -# CONFIG_XZ_DEC_ARM is not set -# CONFIG_XZ_DEC_ARMTHUMB is not set -# CONFIG_XZ_DEC_SPARC is not set -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -CONFIG_UNUSED_SYMBOLS=y -CONFIG_DEBUG_SECTION_MISMATCH=y -CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_PAGEALLOC=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_PROVE_LOCKING=y -CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_SG=y -CONFIG_DEBUG_NOTIFIERS=y -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -# CONFIG_RUNTIME_TESTING_MENU is not set -CONFIG_S390_PTDUMP=y +CONFIG_CRC8=m +CONFIG_CORDIC=m +CONFIG_CMM=m +CONFIG_APPLDATA_BASE=y +CONFIG_KVM=m +CONFIG_KVM_S390_UCONTROL=y +CONFIG_VHOST_NET=m +CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig deleted file mode 100644 index 09aa5cb14873..000000000000 --- a/arch/s390/configs/performance_defconfig +++ /dev/null @@ -1,678 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_AUDIT=y -CONFIG_NO_HZ_IDLE=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_DELAY_ACCT=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_NUMA_BALANCING=y -# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set -CONFIG_MEMCG=y -CONFIG_MEMCG_SWAP=y -CONFIG_BLK_CGROUP=y -CONFIG_CFS_BANDWIDTH=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_CGROUP_PIDS=y -CONFIG_CGROUP_FREEZER=y -CONFIG_CGROUP_HUGETLB=y -CONFIG_CPUSETS=y -CONFIG_CGROUP_DEVICE=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_PERF=y -CONFIG_NAMESPACES=y -CONFIG_USER_NS=y -CONFIG_SCHED_AUTOGROUP=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -# CONFIG_SYSFS_SYSCALL is not set -CONFIG_CHECKPOINT_RESTORE=y -CONFIG_BPF_SYSCALL=y -CONFIG_USERFAULTFD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_KPROBES=y -CONFIG_JUMP_LABEL=y -CONFIG_MODULES=y -CONFIG_MODULE_FORCE_LOAD=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_MODULE_SIG=y -CONFIG_MODULE_SIG_SHA256=y -CONFIG_BLK_DEV_INTEGRITY=y -CONFIG_BLK_DEV_THROTTLING=y -CONFIG_BLK_WBT=y -CONFIG_BLK_WBT_SQ=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_IBM_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_CFQ_GROUP_IOSCHED=y -CONFIG_DEFAULT_DEADLINE=y -CONFIG_LIVEPATCH=y -CONFIG_TUNE_ZEC12=y -CONFIG_NR_CPUS=512 -CONFIG_NUMA=y -CONFIG_HZ_100=y -CONFIG_KEXEC_FILE=y -CONFIG_KEXEC_VERIFY_SIG=y -CONFIG_EXPOLINE=y -CONFIG_EXPOLINE_AUTO=y -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTREMOVE=y -CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y -CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZBUD=m -CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_STAT=y -CONFIG_DEFERRED_STRUCT_PAGE_INIT=y -CONFIG_IDLE_PAGE_TRACKING=y -CONFIG_PCI=y -CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_S390=y -CONFIG_CHSC_SCH=y -CONFIG_VFIO_AP=m -CONFIG_CRASH_DUMP=y -CONFIG_BINFMT_MISC=m -CONFIG_HIBERNATION=y -CONFIG_PM_DEBUG=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_PACKET_DIAG=m -CONFIG_UNIX=y -CONFIG_UNIX_DIAG=m -CONFIG_XFRM_USER=m -CONFIG_NET_KEY=m -CONFIG_SMC=m -CONFIG_SMC_DIAG=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE_DEMUX=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -CONFIG_NET_IPVTI=m -CONFIG_INET_AH=m -CONFIG_INET_ESP=m -CONFIG_INET_IPCOMP=m -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_INET_DIAG=m -CONFIG_INET_UDP_DIAG=m -CONFIG_TCP_CONG_ADVANCED=y -CONFIG_TCP_CONG_HSTCP=m -CONFIG_TCP_CONG_HYBLA=m -CONFIG_TCP_CONG_SCALABLE=m -CONFIG_TCP_CONG_LP=m -CONFIG_TCP_CONG_VENO=m -CONFIG_TCP_CONG_YEAH=m -CONFIG_TCP_CONG_ILLINOIS=m -CONFIG_IPV6_ROUTER_PREF=y -CONFIG_INET6_AH=m -CONFIG_INET6_ESP=m -CONFIG_INET6_IPCOMP=m -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_TRANSPORT=m -CONFIG_INET6_XFRM_MODE_TUNNEL=m -CONFIG_INET6_XFRM_MODE_BEET=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_VTI=m -CONFIG_IPV6_SIT=m -CONFIG_IPV6_GRE=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_TIMEOUT=y -CONFIG_NF_CONNTRACK_TIMESTAMP=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_NETBIOS_NS=m -CONFIG_NF_CONNTRACK_SNMP=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NF_CT_NETLINK=m -CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NF_TABLES=m -CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m -CONFIG_NFT_LOG=m -CONFIG_NFT_LIMIT=m -CONFIG_NFT_NAT=m -CONFIG_NFT_COMPAT=m -CONFIG_NFT_HASH=m -CONFIG_NETFILTER_XT_SET=m -CONFIG_NETFILTER_XT_TARGET_AUDIT=m -CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_CT=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_HMARK=m -CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m -CONFIG_NETFILTER_XT_TARGET_LOG=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_TEE=m -CONFIG_NETFILTER_XT_TARGET_TPROXY=m -CONFIG_NETFILTER_XT_TARGET_TRACE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m -CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_CLUSTER=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m -CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_CPU=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_IPRANGE=m -CONFIG_NETFILTER_XT_MATCH_IPVS=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_NFACCT=m -CONFIG_NETFILTER_XT_MATCH_OSF=m -CONFIG_NETFILTER_XT_MATCH_OWNER=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_RATEEST=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_RECENT=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_TIME=m -CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_SET=m -CONFIG_IP_SET_BITMAP_IP=m -CONFIG_IP_SET_BITMAP_IPMAC=m -CONFIG_IP_SET_BITMAP_PORT=m -CONFIG_IP_SET_HASH_IP=m -CONFIG_IP_SET_HASH_IPPORT=m -CONFIG_IP_SET_HASH_IPPORTIP=m -CONFIG_IP_SET_HASH_IPPORTNET=m -CONFIG_IP_SET_HASH_NETPORTNET=m -CONFIG_IP_SET_HASH_NET=m -CONFIG_IP_SET_HASH_NETNET=m -CONFIG_IP_SET_HASH_NETPORT=m -CONFIG_IP_SET_HASH_NETIFACE=m -CONFIG_IP_SET_LIST_SET=m -CONFIG_IP_VS=m -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m -CONFIG_IP_VS_FTP=m -CONFIG_IP_VS_PE_SIP=m -CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=y -CONFIG_NFT_CHAIN_NAT_IPV4=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_SECURITY=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_IP6_NF_SECURITY=m -CONFIG_IP6_NF_NAT=m -CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=y -CONFIG_RDS=m -CONFIG_RDS_RDMA=m -CONFIG_RDS_TCP=m -CONFIG_L2TP=m -CONFIG_L2TP_DEBUGFS=m -CONFIG_L2TP_V3=y -CONFIG_L2TP_IP=m -CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m -CONFIG_VLAN_8021Q_GVRP=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_HFSC=m -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_MULTIQ=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFB=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_DRR=m -CONFIG_NET_SCH_MQPRIO=m -CONFIG_NET_SCH_CHOKE=m -CONFIG_NET_SCH_QFQ=m -CONFIG_NET_SCH_CODEL=m -CONFIG_NET_SCH_FQ_CODEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_SCH_PLUG=m -CONFIG_NET_CLS_BASIC=m -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_CLS_U32_PERF=y -CONFIG_CLS_U32_MARK=y -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_FLOW=m -CONFIG_NET_CLS_CGROUP=y -CONFIG_NET_CLS_BPF=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_ACT_POLICE=m -CONFIG_NET_ACT_GACT=m -CONFIG_GACT_PROB=y -CONFIG_NET_ACT_MIRRED=m -CONFIG_NET_ACT_IPT=m -CONFIG_NET_ACT_NAT=m -CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_ACT_SIMP=m -CONFIG_NET_ACT_SKBEDIT=m -CONFIG_NET_ACT_CSUM=m -CONFIG_DNS_RESOLVER=y -CONFIG_OPENVSWITCH=m -CONFIG_VSOCKETS=m -CONFIG_VIRTIO_VSOCKETS=m -CONFIG_NETLINK_DIAG=m -CONFIG_CGROUP_NET_PRIO=y -CONFIG_BPF_JIT=y -CONFIG_NET_PKTGEN=m -CONFIG_DEVTMPFS=y -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=0 -CONFIG_CONNECTOR=y -CONFIG_ZRAM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_DRBD=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_VIRTIO_BLK=y -CONFIG_BLK_DEV_RBD=m -CONFIG_BLK_DEV_NVME=m -CONFIG_ENCLOSURE_SERVICES=m -CONFIG_GENWQE=m -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=y -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SPI_ATTRS=m -CONFIG_SCSI_FC_ATTRS=y -CONFIG_SCSI_SAS_LIBSAS=m -CONFIG_SCSI_SRP_ATTRS=m -CONFIG_ISCSI_TCP=m -CONFIG_SCSI_DEBUG=m -CONFIG_ZFCP=y -CONFIG_SCSI_VIRTIO=m -CONFIG_SCSI_DH=y -CONFIG_SCSI_DH_RDAC=m -CONFIG_SCSI_DH_HP_SW=m -CONFIG_SCSI_DH_EMC=m -CONFIG_SCSI_DH_ALUA=m -CONFIG_SCSI_OSD_INITIATOR=m -CONFIG_SCSI_OSD_ULD=m -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_MULTIPATH=m -CONFIG_MD_FAULTY=m -CONFIG_BLK_DEV_DM=m -CONFIG_DM_CRYPT=m -CONFIG_DM_SNAPSHOT=m -CONFIG_DM_THIN_PROVISIONING=m -CONFIG_DM_MIRROR=m -CONFIG_DM_LOG_USERSPACE=m -CONFIG_DM_RAID=m -CONFIG_DM_ZERO=m -CONFIG_DM_MULTIPATH=m -CONFIG_DM_MULTIPATH_QL=m -CONFIG_DM_MULTIPATH_ST=m -CONFIG_DM_DELAY=m -CONFIG_DM_UEVENT=y -CONFIG_DM_FLAKEY=m -CONFIG_DM_VERITY=m -CONFIG_DM_SWITCH=m -CONFIG_NETDEVICES=y -CONFIG_BONDING=m -CONFIG_DUMMY=m -CONFIG_EQUALIZER=m -CONFIG_IFB=m -CONFIG_MACVLAN=m -CONFIG_MACVTAP=m -CONFIG_VXLAN=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_VIRTIO_NET=m -CONFIG_NLMON=m -# CONFIG_NET_VENDOR_ARC is not set -# CONFIG_NET_VENDOR_CHELSIO is not set -# CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_MARVELL is not set -CONFIG_MLX4_EN=m -CONFIG_MLX5_CORE=m -CONFIG_MLX5_CORE_EN=y -# CONFIG_NET_VENDOR_NATSEMI is not set -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPTP=m -CONFIG_PPPOL2TP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_ISM=m -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_LEGACY_PTY_COUNT=0 -CONFIG_HW_RANDOM_VIRTIO=m -CONFIG_RAW_DRIVER=m -CONFIG_HANGCHECK_TIMER=m -CONFIG_TN3270_FS=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_SOFT_WATCHDOG=m -CONFIG_DIAG288_WATCHDOG=m -CONFIG_DRM=y -CONFIG_DRM_VIRTIO_GPU=y -CONFIG_FRAMEBUFFER_CONSOLE=y -# CONFIG_HID is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_INFINIBAND=m -CONFIG_INFINIBAND_USER_ACCESS=m -CONFIG_MLX4_INFINIBAND=m -CONFIG_MLX5_INFINIBAND=m -CONFIG_VFIO=m -CONFIG_VFIO_PCI=m -CONFIG_VFIO_MDEV=m -CONFIG_VFIO_MDEV_DEVICE=m -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m -CONFIG_VIRTIO_INPUT=y -CONFIG_S390_AP_IOMMU=y -CONFIG_EXT4_FS=y -CONFIG_EXT4_FS_POSIX_ACL=y -CONFIG_EXT4_FS_SECURITY=y -CONFIG_JBD2_DEBUG=y -CONFIG_JFS_FS=m -CONFIG_JFS_POSIX_ACL=y -CONFIG_JFS_SECURITY=y -CONFIG_JFS_STATISTICS=y -CONFIG_XFS_FS=y -CONFIG_XFS_QUOTA=y -CONFIG_XFS_POSIX_ACL=y -CONFIG_XFS_RT=y -CONFIG_GFS2_FS=m -CONFIG_GFS2_FS_LOCKING_DLM=y -CONFIG_OCFS2_FS=m -CONFIG_BTRFS_FS=y -CONFIG_BTRFS_FS_POSIX_ACL=y -CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y -CONFIG_EXPORTFS_BLOCK_OPS=y -CONFIG_FS_ENCRYPTION=y -CONFIG_FANOTIFY=y -CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y -CONFIG_QUOTA_NETLINK_INTERFACE=y -CONFIG_QFMT_V1=m -CONFIG_QFMT_V2=m -CONFIG_AUTOFS4_FS=m -CONFIG_FUSE_FS=y -CONFIG_CUSE=m -CONFIG_OVERLAY_FS=m -CONFIG_FSCACHE=m -CONFIG_CACHEFILES=m -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_UDF_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m -CONFIG_ECRYPT_FS=m -CONFIG_CRAMFS=m -CONFIG_SQUASHFS=m -CONFIG_SQUASHFS_XATTR=y -CONFIG_SQUASHFS_LZO=y -CONFIG_SQUASHFS_XZ=y -CONFIG_ROMFS_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=m -CONFIG_NFS_SWAP=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_NFSD_V4_SECURITY_LABEL=y -CONFIG_CIFS=m -CONFIG_CIFS_STATS=y -CONFIG_CIFS_STATS2=y -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_CIFS_UPCALL=y -CONFIG_CIFS_XATTR=y -CONFIG_CIFS_POSIX=y -# CONFIG_CIFS_DEBUG is not set -CONFIG_CIFS_DFS_UPCALL=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y -CONFIG_GDB_SCRIPTS=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_MEMORY_INIT=y -CONFIG_PANIC_ON_OOPS=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_RCU_CPU_STALL_TIMEOUT=60 -CONFIG_LATENCYTOP=y -CONFIG_SCHED_TRACER=y -CONFIG_FTRACE_SYSCALLS=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_FUNCTION_PROFILER=y -CONFIG_HIST_TRIGGERS=y -CONFIG_LKDTM=m -CONFIG_PERCPU_TEST=m -CONFIG_ATOMIC64_SELFTEST=y -CONFIG_TEST_BPF=m -CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_S390_PTDUMP=y -CONFIG_PERSISTENT_KEYRINGS=y -CONFIG_BIG_KEYS=y -CONFIG_ENCRYPTED_KEYS=m -CONFIG_SECURITY=y -CONFIG_SECURITY_NETWORK=y -CONFIG_SECURITY_SELINUX=y -CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 -CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_INTEGRITY_SIGNATURE=y -CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y -CONFIG_IMA=y -CONFIG_IMA_DEFAULT_HASH_SHA256=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_APPRAISE=y -CONFIG_CRYPTO_FIPS=y -CONFIG_CRYPTO_DH=m -CONFIG_CRYPTO_ECDH=m -CONFIG_CRYPTO_USER=m -# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set -CONFIG_CRYPTO_PCRYPT=m -CONFIG_CRYPTO_CRYPTD=m -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_VMAC=m -CONFIG_CRYPTO_CRC32=m -CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_RMD128=m -CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_RMD256=m -CONFIG_CRYPTO_RMD320=m -CONFIG_CRYPTO_SHA512=m -CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_TGR192=m -CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES_TI=m -CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_BLOWFISH=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_CAST5=m -CONFIG_CRYPTO_CAST6=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CRYPTO_KHAZAD=m -CONFIG_CRYPTO_SALSA20=m -CONFIG_CRYPTO_SEED=m -CONFIG_CRYPTO_SERPENT=m -CONFIG_CRYPTO_TEA=m -CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_842=m -CONFIG_CRYPTO_LZ4=m -CONFIG_CRYPTO_LZ4HC=m -CONFIG_CRYPTO_ANSI_CPRNG=m -CONFIG_CRYPTO_USER_API_HASH=m -CONFIG_CRYPTO_USER_API_SKCIPHER=m -CONFIG_CRYPTO_USER_API_RNG=m -CONFIG_CRYPTO_USER_API_AEAD=m -CONFIG_ZCRYPT=m -CONFIG_PKEY=m -CONFIG_CRYPTO_PAES_S390=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=y -CONFIG_CRC7=m -CONFIG_CRC8=m -CONFIG_CORDIC=m -CONFIG_CMM=m -CONFIG_APPLDATA_BASE=y -CONFIG_KVM=m -CONFIG_KVM_S390_UCONTROL=y -CONFIG_VHOST_NET=m -CONFIG_VHOST_VSOCK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 7dc7f58c4287..d92bab844b73 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -24,7 +24,6 @@ CONFIG_CRASH_DUMP=y # CONFIG_SECCOMP is not set CONFIG_NET=y # CONFIG_IUCV is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_BLK_DEV_RAM=y # CONFIG_BLK_DEV_XPRAM is not set diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 86aed30fad3a..eeeb6a7737a4 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -137,7 +137,7 @@ static struct shash_alg ghash_alg = { static int __init ghash_mod_init(void) { if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH)) - return -EOPNOTSUPP; + return -ENODEV; return crypto_register_shash(&ghash_alg); } diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 12cca467af7d..d977643fa627 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -824,7 +824,7 @@ static int __init prng_init(void) /* check if the CPU has a PRNG */ if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) - return -EOPNOTSUPP; + return -ENODEV; /* check if TRNG subfunction is available */ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) @@ -837,7 +837,7 @@ static int __init prng_init(void) if (prng_mode == PRNG_MODE_SHA512) { pr_err("The prng module cannot " "start in SHA-512 mode\n"); - return -EOPNOTSUPP; + return -ENODEV; } prng_mode = PRNG_MODE_TDES; } else diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 009572e8276d..7c15542d3685 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -86,7 +86,7 @@ static struct shash_alg alg = { static int __init sha1_s390_init(void) { if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) - return -EOPNOTSUPP; + return -ENODEV; return crypto_register_shash(&alg); } diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 62833a1d8724..af7505148f80 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -117,7 +117,7 @@ static int __init sha256_s390_init(void) int ret; if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) - return -EOPNOTSUPP; + return -ENODEV; ret = crypto_register_shash(&sha256_alg); if (ret < 0) goto out; diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index be589c340d15..ad29db085a18 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -127,7 +127,7 @@ static int __init init(void) int ret; if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) - return -EOPNOTSUPP; + return -ENODEV; if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; if ((ret = crypto_register_shash(&sha384_alg)) < 0) diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index c10d2ee2dfda..01936fdfaddb 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -11,6 +11,7 @@ #define _ASM_S390_AIRQ_H #include <linux/bit_spinlock.h> +#include <linux/dma-mapping.h> struct airq_struct { struct hlist_node list; /* Handler queueing. */ @@ -29,6 +30,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq); /* Adapter interrupt bit vector */ struct airq_iv { unsigned long *vector; /* Adapter interrupt bit vector */ + dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */ unsigned long *avail; /* Allocation bit mask for the bit vector */ unsigned long *bitlock; /* Lock bit mask for the bit vector */ unsigned long *ptr; /* Pointer associated with each bit */ diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index fd20ab5d4cf7..491ad53a0d4e 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -84,9 +84,9 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline long atomic64_read(const atomic64_t *v) +static inline s64 atomic64_read(const atomic64_t *v) { - long c; + s64 c; asm volatile( " lg %0,%1\n" @@ -94,49 +94,49 @@ static inline long atomic64_read(const atomic64_t *v) return c; } -static inline void atomic64_set(atomic64_t *v, long i) +static inline void atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" : "=Q" (v->counter) : "d" (i)); } -static inline long atomic64_add_return(long i, atomic64_t *v) +static inline s64 atomic64_add_return(s64 i, atomic64_t *v) { - return __atomic64_add_barrier(i, &v->counter) + i; + return __atomic64_add_barrier(i, (long *)&v->counter) + i; } -static inline long atomic64_fetch_add(long i, atomic64_t *v) +static inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { - return __atomic64_add_barrier(i, &v->counter); + return __atomic64_add_barrier(i, (long *)&v->counter); } -static inline void atomic64_add(long i, atomic64_t *v) +static inline void atomic64_add(s64 i, atomic64_t *v) { #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES if (__builtin_constant_p(i) && (i > -129) && (i < 128)) { - __atomic64_add_const(i, &v->counter); + __atomic64_add_const(i, (long *)&v->counter); return; } #endif - __atomic64_add(i, &v->counter); + __atomic64_add(i, (long *)&v->counter); } #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { - return __atomic64_cmpxchg(&v->counter, old, new); + return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define ATOMIC64_OPS(op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ +static inline void atomic64_##op(s64 i, atomic64_t *v) \ { \ - __atomic64_##op(i, &v->counter); \ + __atomic64_##op(i, (long *)&v->counter); \ } \ -static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +static inline long atomic64_fetch_##op(s64 i, atomic64_t *v) \ { \ - return __atomic64_##op##_barrier(i, &v->counter); \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) @@ -145,8 +145,8 @@ ATOMIC64_OPS(xor) #undef ATOMIC64_OPS -#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v) -#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v) -#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v) +#define atomic64_sub_return(_i, _v) atomic64_add_return(-(s64)(_i), _v) +#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(s64)(_i), _v) +#define atomic64_sub(_i, _v) atomic64_add(-(s64)(_i), _v) #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index a29dd430fb40..865ce1cb86d5 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); +extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size); +extern void ccw_device_dma_free(struct ccw_device *cdev, + void *cpu_addr, size_t size); + int ccw_device_siosl(struct ccw_device *); extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 1727180e8ca1..b5bfb3123cb1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -7,6 +7,7 @@ #include <linux/spinlock.h> #include <linux/bitops.h> +#include <linux/genalloc.h> #include <asm/types.h> #define LPM_ANYPATH 0xff @@ -264,6 +265,36 @@ struct ciw { #define CIW_TYPE_RNI 0x2 /* read node identifier */ /* + * Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands" + */ + +#define ND_VALIDITY_VALID 0 +#define ND_VALIDITY_OUTDATED 1 +#define ND_VALIDITY_INVALID 2 + +struct node_descriptor { + /* Flags. */ + union { + struct { + u32 validity:3; + u32 reserved:5; + } __packed; + u8 byte0; + } __packed; + + /* Node parameters. */ + u32 params:24; + + /* Node ID. */ + char type[6]; + char model[3]; + char manufacturer[3]; + char plant[2]; + char seq[12]; + u16 tag; +} __packed; + +/* * Flags used as input parameters for do_IO() */ #define DOIO_ALLOW_SUSPEND 0x0001 /* allow for channel prog. suspend */ @@ -328,6 +359,16 @@ static inline u8 pathmask_to_pos(u8 mask) void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); +extern void *cio_dma_zalloc(size_t size); +extern void cio_dma_free(void *cpu_addr, size_t size); +extern struct device *cio_get_dma_css_dev(void); + +void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev, + size_t size); +void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size); +void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev); +struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages); + /* Function from drivers/s390/cio/chsc.c */ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index 3bda757317cf..0cf6b53587db 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -112,13 +112,8 @@ union ctlreg2 { }; }; -#ifdef CONFIG_SMP -# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) -# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) -#else -# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit) -# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit) -#endif +#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit) +#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit) #endif /* __ASSEMBLY__ */ #endif /* __ASM_CTL_REG_H */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index c305d39f5016..310134015541 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -107,13 +107,37 @@ void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); void debug_set_critical(void); + void debug_stop_all(void); +/** + * debug_level_enabled() - Returns true if debug events for the specified + * level would be logged. Otherwise returns false. + * + * @id: handle for debug log + * @level: debug level + * + * Return: + * - %true if level is less or equal to the current debug level. + */ static inline bool debug_level_enabled(debug_info_t *id, int level) { return level <= id->level; } +/** + * debug_event() - writes binary debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @data: pointer to data for debug entry + * @length: length of data in bytes + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_event(debug_info_t *id, int level, void *data, int length) { @@ -122,6 +146,18 @@ static inline debug_entry_t *debug_event(debug_info_t *id, int level, return debug_event_common(id, level, data, length); } +/** + * debug_int_event() - writes unsigned integer debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @tag: integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, unsigned int tag) { @@ -132,6 +168,18 @@ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level, return debug_event_common(id, level, &t, sizeof(unsigned int)); } +/** + * debug_long_event() - writes unsigned long debug entry to active debug area + * (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @tag: long integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, unsigned long tag) { @@ -142,6 +190,18 @@ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level, return debug_event_common(id, level, &t, sizeof(unsigned long)); } +/** + * debug_text_event() - writes string debug entry in ascii format to active + * debug area (if level <= actual debug level) + * + * @id: handle for debug log + * @level: debug level + * @txt: string for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, const char *txt) { @@ -152,12 +212,28 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); +/** + * debug_sprintf_event() - writes debug entry with format string + * and varargs (longs) to active debug area + * (if level $<=$ actual debug level). + * + * @_id: handle for debug log + * @_level: debug level + * @_fmt: format string for debug entry + * @...: varargs used as in sprintf() + * + * Return: + * - Address of written debug entry + * - %NULL if error + * + * floats and long long datatypes cannot be used as varargs. + */ #define debug_sprintf_event(_id, _level, _fmt, ...) \ ({ \ debug_entry_t *__ret; \ @@ -172,6 +248,20 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __ret; \ }) +/** + * debug_exception() - writes binary debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @data: pointer to data for debug entry + * @length: length of data in bytes + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_exception(debug_info_t *id, int level, void *data, int length) { @@ -180,6 +270,19 @@ static inline debug_entry_t *debug_exception(debug_info_t *id, int level, return debug_exception_common(id, level, data, length); } +/** + * debug_int_exception() - writes unsigned int debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @tag: integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, unsigned int tag) { @@ -190,6 +293,19 @@ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level, return debug_exception_common(id, level, &t, sizeof(unsigned int)); } +/** + * debug_long_exception() - writes long debug entry to active debug area + * (if level <= actual debug level) + * and switches to next debug area + * + * @id: handle for debug log + * @level: debug level + * @tag: long integer value for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, unsigned long tag) { @@ -200,6 +316,20 @@ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level, return debug_exception_common(id, level, &t, sizeof(unsigned long)); } +/** + * debug_text_exception() - writes string debug entry in ascii format to active + * debug area (if level <= actual debug level) + * and switches to next debug area + * area + * + * @id: handle for debug log + * @level: debug level + * @txt: string for debug entry + * + * Return: + * - Address of written debug entry + * - %NULL if error + */ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, const char *txt) { @@ -210,12 +340,30 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); + +/** + * debug_sprintf_exception() - writes debug entry with format string and + * varargs (longs) to active debug area + * (if level <= actual debug level) + * and switches to next debug area. + * + * @_id: handle for debug log + * @_level: debug level + * @_fmt: format string for debug entry + * @...: varargs used as in sprintf() + * + * Return: + * - Address of written debug entry + * - %NULL if error + * + * floats and long long datatypes cannot be used as varargs. + */ #define debug_sprintf_exception(_id, _level, _fmt, ...) \ ({ \ debug_entry_t *__ret; \ @@ -231,6 +379,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) }) int debug_register_view(debug_info_t *id, struct debug_view *view); + int debug_unregister_view(debug_info_t *id, struct debug_view *view); /* diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index e78cda94456b..68c476b20b57 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -59,6 +59,18 @@ static inline int test_facility(unsigned long nr) return __test_facility(nr, &S390_lowcore.stfle_fac_list); } +static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size) +{ + register unsigned long reg0 asm("0") = size - 1; + + asm volatile( + ".insn s,0xb2b00000,0(%1)" /* stfle */ + : "+d" (reg0) + : "a" (stfle_fac_list) + : "memory", "cc"); + return reg0; +} + /** * stfle - Store facility list extended * @stfle_fac_list: array where facility list can be stored @@ -75,13 +87,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size) memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4); if (S390_lowcore.stfl_fac_list & 0x01000000) { /* More facility bits available with stfle */ - register unsigned long reg0 asm("0") = size - 1; - - asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */ - : "+d" (reg0) - : "a" (stfle_fac_list) - : "memory", "cc"); - nr = (reg0 + 1) * 8; /* # bytes stored by stfle */ + nr = __stfle_asm(stfle_fac_list, size); + nr = min_t(unsigned long, (nr + 1) * 8, size * 8); } memset((char *) stfle_fac_list + nr, 0, size * 8 - nr); } diff --git a/arch/s390/include/asm/idals.h b/arch/s390/include/asm/idals.h index 15578fd762f6..6fb7aced104a 100644 --- a/arch/s390/include/asm/idals.h +++ b/arch/s390/include/asm/idals.h @@ -122,8 +122,7 @@ idal_buffer_alloc(size_t size, int page_order) nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG; nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG; - ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *), - GFP_DMA | GFP_KERNEL); + ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL); if (ib == NULL) return ERR_PTR(-ENOMEM); ib->size = size; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2b00a3ebee08..abe60268335d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include <linux/kvm.h> #include <linux/seqlock.h> +#include <linux/module.h> #include <asm/debug.h> #include <asm/cpu.h> #include <asm/fpu/api.h> @@ -720,8 +721,14 @@ struct kvm_s390_cpu_model { unsigned short ibc; }; +struct kvm_s390_module_hook { + int (*hook)(struct kvm_vcpu *vcpu); + struct module *owner; +}; + struct kvm_s390_crypto { struct kvm_s390_crypto_cb *crycb; + struct kvm_s390_module_hook *pqap_hook; __u32 crycbd; __u8 aes_kw; __u8 dea_kw; @@ -905,7 +912,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); static inline void kvm_arch_hardware_disable(void) {} -static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h new file mode 100644 index 000000000000..3eb018508190 --- /dev/null +++ b/arch/s390/include/asm/mem_encrypt.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef S390_MEM_ENCRYPT_H__ +#define S390_MEM_ENCRYPT_H__ + +#ifndef __ASSEMBLY__ + +#define sme_me_mask 0ULL + +static inline bool sme_active(void) { return false; } +extern bool sev_active(void); + +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +#endif /* __ASSEMBLY__ */ + +#endif /* S390_MEM_ENCRYPT_H__ */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd55326..a2399eff84ca 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -194,6 +194,11 @@ int zpci_init_iommu(struct zpci_dev *zdev); void zpci_destroy_iommu(struct zpci_dev *zdev); #ifdef CONFIG_PCI +static inline bool zpci_use_mio(struct zpci_dev *zdev) +{ + return static_branch_likely(&have_mio) && zdev->mio_capable; +} + /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index ff81ed19c506..61cf9531f68f 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -143,14 +143,4 @@ static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc) return __zpci_set_irq_ctrl(ctl, isc, &iib); } -#ifdef CONFIG_PCI -static inline void enable_mio_ctl(void) -{ - if (static_branch_likely(&have_mio)) - __ctl_set_bit(2, 5); -} -#else /* CONFIG_PCI */ -static inline void enable_mio_ctl(void) {} -#endif /* CONFIG_PCI */ - #endif diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 0095ddb58ff6..50b4ce8cddfd 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -16,7 +16,7 @@ * per cpu area, use weak definitions to force the compiler to * generate external references. */ -#if defined(CONFIG_SMP) && defined(MODULE) +#if defined(MODULE) #define ARCH_NEEDS_WEAK_PER_CPU #endif diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9f0195d5fa16..9b274fcaacb6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1270,14 +1270,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address) #define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) #define pte_unmap(pte) do { } while (0) -static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (end < start) - return false; return end <= current->mm->context.asce_limit; } #define gup_fast_permitted gup_fast_permitted diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index b0fcbc37b637..14883b1562e0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -36,6 +36,7 @@ #ifndef __ASSEMBLY__ +#include <linux/cpumask.h> #include <linux/linkage.h> #include <linux/irqflags.h> #include <asm/cpu.h> @@ -221,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void) return cpu_address; } -/* - * Give up the time slice of the virtual PU. - */ -#define cpu_relax_yield cpu_relax_yield -void cpu_relax_yield(void); - #define cpu_relax() barrier() #define ECAG_CACHE_ATTRIBUTE 0 diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index f577c5f6031a..c563f8368b19 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -80,7 +80,6 @@ struct sclp_info { unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; unsigned char has_sipl : 1; - unsigned char has_sipl_g2 : 1; unsigned char has_dirq : 1; unsigned int ibc; unsigned int mtid; diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 3907ead27ffa..b157a81fb977 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -9,9 +9,6 @@ #define __ASM_SMP_H #include <asm/sigp.h> - -#ifdef CONFIG_SMP - #include <asm/lowcore.h> #define raw_smp_processor_id() (S390_lowcore.cpu_nr) @@ -40,33 +37,6 @@ extern int smp_cpu_get_polarization(int cpu); extern void smp_fill_possible_mask(void); extern void smp_detect_cpus(void); -#else /* CONFIG_SMP */ - -#define smp_cpu_mtid 0 - -static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) -{ - func(data); -} - -static inline void smp_call_online_cpu(void (*func)(void *), void *data) -{ - func(data); -} - -static inline void smp_emergency_stop(void) -{ -} - -static inline int smp_find_processor_id(u16 address) { return 0; } -static inline int smp_store_status(int cpu) { return 0; } -static inline int smp_vcpu_scheduled(int cpu) { return 1; } -static inline void smp_yield_cpu(int cpu) { } -static inline void smp_fill_possible_mask(void) { } -static inline void smp_detect_cpus(void) { } - -#endif /* CONFIG_SMP */ - static inline void smp_stop_cpu(void) { u16 pcpu = stap(); @@ -83,14 +53,9 @@ static inline int smp_get_base_cpu(int cpu) return cpu - (cpu % (smp_cpu_mtid + 1)); } -#ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); extern void __noreturn cpu_die(void); extern void __cpu_die(unsigned int cpu); extern int __cpu_disable(void); -#else -static inline int smp_rescan_cpus(void) { return 0; } -static inline void cpu_die(void) { } -#endif #endif /* __ASM_SMP_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 0a29588aa00b..c02bff33f6c7 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -20,11 +20,7 @@ extern int spin_retry; -#ifndef CONFIG_SMP -static inline bool arch_vcpu_is_preempted(int cpu) { return false; } -#else bool arch_vcpu_is_preempted(int cpu); -#endif #define vcpu_is_preempted arch_vcpu_is_preempted diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 8c840f0904f3..82703e03f35d 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -32,7 +32,6 @@ static inline void __tlb_flush_idte(unsigned long asce) : : "a" (opt), "a" (asce) : "cc"); } -#ifdef CONFIG_SMP void smp_ptlb_all(void); /* @@ -83,22 +82,6 @@ static inline void __tlb_flush_kernel(void) else __tlb_flush_global(); } -#else -#define __tlb_flush_global() __tlb_flush_local() - -/* - * Flush TLB entries for a specific ASCE on all CPUs. - */ -static inline void __tlb_flush_mm(struct mm_struct *mm) -{ - __tlb_flush_local(); -} - -static inline void __tlb_flush_kernel(void) -{ - __tlb_flush_local(); -} -#endif static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 6eb2ef105d87..d827b5b9a32c 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -79,23 +79,4 @@ static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} -#ifdef CONFIG_KASAN -/* - * This disables KASAN checking when reading a value from another task's stack, - * since the other task could be running on another CPU and could have poisoned - * the stack in the meantime. - */ -#define READ_ONCE_TASK_STACK(task, x) \ -({ \ - unsigned long val; \ - if (task == current) \ - val = READ_ONCE(x); \ - else \ - val = READ_ONCE_NOCHECK(x); \ - val; \ -}) -#else -#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x) -#endif - #endif /* _ASM_S390_UNWIND_H */ diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h index 832be5c2584f..9ec86fae9980 100644 --- a/arch/s390/include/uapi/asm/dasd.h +++ b/arch/s390/include/uapi/asm/dasd.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* +/* * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> * Bugreports.to..: <Linux390@de.ibm.com> * Copyright IBM Corp. 1999, 2000 @@ -21,40 +21,40 @@ #define DASD_API_VERSION 6 -/* +/* * struct dasd_information2_t * represents any data about the device, which is visible to userspace. * including foramt and featueres. */ typedef struct dasd_information2_t { - unsigned int devno; /* S/390 devno */ - unsigned int real_devno; /* for aliases */ - unsigned int schid; /* S/390 subchannel identifier */ - unsigned int cu_type : 16; /* from SenseID */ - unsigned int cu_model : 8; /* from SenseID */ - unsigned int dev_type : 16; /* from SenseID */ - unsigned int dev_model : 8; /* from SenseID */ - unsigned int open_count; - unsigned int req_queue_len; - unsigned int chanq_len; /* length of chanq */ - char type[4]; /* from discipline.name, 'none' for unknown */ - unsigned int status; /* current device level */ - unsigned int label_block; /* where to find the VOLSER */ - unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ - unsigned int characteristics_size; - unsigned int confdata_size; - char characteristics[64]; /* from read_device_characteristics */ - char configuration_data[256]; /* from read_configuration_data */ - unsigned int format; /* format info like formatted/cdl/ldl/... */ - unsigned int features; /* dasd features like 'ro',... */ - unsigned int reserved0; /* reserved for further use ,... */ - unsigned int reserved1; /* reserved for further use ,... */ - unsigned int reserved2; /* reserved for further use ,... */ - unsigned int reserved3; /* reserved for further use ,... */ - unsigned int reserved4; /* reserved for further use ,... */ - unsigned int reserved5; /* reserved for further use ,... */ - unsigned int reserved6; /* reserved for further use ,... */ - unsigned int reserved7; /* reserved for further use ,... */ + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ + unsigned int format; /* format info like formatted/cdl/ldl/... */ + unsigned int features; /* dasd features like 'ro',... */ + unsigned int reserved0; /* reserved for further use ,... */ + unsigned int reserved1; /* reserved for further use ,... */ + unsigned int reserved2; /* reserved for further use ,... */ + unsigned int reserved3; /* reserved for further use ,... */ + unsigned int reserved4; /* reserved for further use ,... */ + unsigned int reserved5; /* reserved for further use ,... */ + unsigned int reserved6; /* reserved for further use ,... */ + unsigned int reserved7; /* reserved for further use ,... */ } dasd_information2_t; /* @@ -92,34 +92,34 @@ typedef struct dasd_information2_t { #define DASD_PARTN_BITS 2 -/* +/* * struct dasd_information_t * represents any data about the data, which is visible to userspace */ typedef struct dasd_information_t { - unsigned int devno; /* S/390 devno */ - unsigned int real_devno; /* for aliases */ - unsigned int schid; /* S/390 subchannel identifier */ - unsigned int cu_type : 16; /* from SenseID */ - unsigned int cu_model : 8; /* from SenseID */ - unsigned int dev_type : 16; /* from SenseID */ - unsigned int dev_model : 8; /* from SenseID */ - unsigned int open_count; - unsigned int req_queue_len; - unsigned int chanq_len; /* length of chanq */ - char type[4]; /* from discipline.name, 'none' for unknown */ - unsigned int status; /* current device level */ - unsigned int label_block; /* where to find the VOLSER */ - unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ - unsigned int characteristics_size; - unsigned int confdata_size; - char characteristics[64]; /* from read_device_characteristics */ - char configuration_data[256]; /* from read_configuration_data */ + unsigned int devno; /* S/390 devno */ + unsigned int real_devno; /* for aliases */ + unsigned int schid; /* S/390 subchannel identifier */ + unsigned int cu_type : 16; /* from SenseID */ + unsigned int cu_model : 8; /* from SenseID */ + unsigned int dev_type : 16; /* from SenseID */ + unsigned int dev_model : 8; /* from SenseID */ + unsigned int open_count; + unsigned int req_queue_len; + unsigned int chanq_len; /* length of chanq */ + char type[4]; /* from discipline.name, 'none' for unknown */ + unsigned int status; /* current device level */ + unsigned int label_block; /* where to find the VOLSER */ + unsigned int FBA_layout; /* fixed block size (like AIXVOL) */ + unsigned int characteristics_size; + unsigned int confdata_size; + char characteristics[64]; /* from read_device_characteristics */ + char configuration_data[256]; /* from read_configuration_data */ } dasd_information_t; /* * Read Subsystem Data - Performance Statistics - */ + */ typedef struct dasd_rssd_perf_stats_t { unsigned char invalid:1; unsigned char format:3; @@ -154,21 +154,21 @@ typedef struct dasd_rssd_perf_stats_t { unsigned char reseved2[96]; } __attribute__((packed)) dasd_rssd_perf_stats_t; -/* +/* * struct profile_info_t - * holds the profinling information + * holds the profinling information */ typedef struct dasd_profile_info_t { - unsigned int dasd_io_reqs; /* number of requests processed at all */ - unsigned int dasd_io_sects; /* number of sectors processed at all */ - unsigned int dasd_io_secs[32]; /* histogram of request's sizes */ - unsigned int dasd_io_times[32]; /* histogram of requests's times */ - unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */ - unsigned int dasd_io_time1[32]; /* histogram of time from build to start */ - unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */ - unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */ - unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */ - unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */ + unsigned int dasd_io_reqs; /* number of requests processed at all */ + unsigned int dasd_io_sects; /* number of sectors processed at all */ + unsigned int dasd_io_secs[32]; /* histogram of request's sizes */ + unsigned int dasd_io_times[32]; /* histogram of requests's times */ + unsigned int dasd_io_timps[32]; /* histogram of requests's times per sector */ + unsigned int dasd_io_time1[32]; /* histogram of time from build to start */ + unsigned int dasd_io_time2[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time2ps[32]; /* histogram of time from start to irq */ + unsigned int dasd_io_time3[32]; /* histogram of time from irq to end */ + unsigned int dasd_io_nr_req[32]; /* histogram of # of requests in chanq */ } dasd_profile_info_t; /* @@ -189,10 +189,12 @@ typedef struct format_data_t { * 3/11: also write home address * 4/12: invalidate track */ -#define DASD_FMT_INT_FMT_R0 1 /* write record zero */ -#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */ -#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */ -#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */ +#define DASD_FMT_INT_FMT_R0 1 /* write record zero */ +#define DASD_FMT_INT_FMT_HA 2 /* write home address, also set FMT_R0 ! */ +#define DASD_FMT_INT_INVAL 4 /* invalidate tracks */ +#define DASD_FMT_INT_COMPAT 8 /* use OS/390 compatible disk layout */ +#define DASD_FMT_INT_FMT_NOR0 16 /* remove permission to write record zero */ +#define DASD_FMT_INT_ESE_FULL 32 /* release space for entire volume */ /* * struct format_check_t @@ -225,7 +227,7 @@ typedef struct format_check_t { /* If key-length was != 0 */ #define DASD_FMT_ERR_KEY_LENGTH 5 -/* +/* * struct attrib_data_t * represents the operation (cache) bits for the device. * Used in DE to influence caching of the DASD. @@ -281,13 +283,13 @@ struct dasd_snid_ioctl_data { * Here ist how the ioctl-nr should be used: * 0 - 31 DASD driver itself * 32 - 239 still open - * 240 - 255 reserved for EMC + * 240 - 255 reserved for EMC *******************************************************************************/ /* Disable the volume (for Linux) */ -#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0) +#define BIODASDDISABLE _IO(DASD_IOCTL_LETTER,0) /* Enable the volume (for Linux) */ -#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1) +#define BIODASDENABLE _IO(DASD_IOCTL_LETTER,1) /* Issue a reserve/release command, rsp. */ #define BIODASDRSRV _IO(DASD_IOCTL_LETTER,2) /* reserve */ #define BIODASDRLSE _IO(DASD_IOCTL_LETTER,3) /* release */ @@ -295,9 +297,9 @@ struct dasd_snid_ioctl_data { /* reset profiling information of a device */ #define BIODASDPRRST _IO(DASD_IOCTL_LETTER,5) /* Quiesce IO on device */ -#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) +#define BIODASDQUIESCE _IO(DASD_IOCTL_LETTER,6) /* Resume IO on device */ -#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) +#define BIODASDRESUME _IO(DASD_IOCTL_LETTER,7) /* Abort all I/O on a device */ #define BIODASDABORTIO _IO(DASD_IOCTL_LETTER, 240) /* Allow I/O on a device */ @@ -315,13 +317,15 @@ struct dasd_snid_ioctl_data { /* Performance Statistics Read */ #define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t) /* Get Attributes (cache operations) */ -#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) +#define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) /* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */ -#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t) +#define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t) /* Set Attributes (cache operations) */ -#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) +#define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) +/* Release Allocated Space */ +#define BIODASDRAS _IOW(DASD_IOCTL_LETTER, 3, format_data_t) /* Get Sense Path Group ID (SNID) data */ #define BIODASDSNID _IOWR(DASD_IOCTL_LETTER, 1, struct dasd_snid_ioctl_data) diff --git a/arch/s390/include/uapi/asm/runtime_instr.h b/arch/s390/include/uapi/asm/runtime_instr.h index 45c9ec984e6b..455da46e3193 100644 --- a/arch/s390/include/uapi/asm/runtime_instr.h +++ b/arch/s390/include/uapi/asm/runtime_instr.h @@ -57,7 +57,7 @@ struct runtime_instr_cb { __u64 sf; __u64 rsic; __u64 reserved8; -} __packed __aligned(8); +} __attribute__((__packed__, __aligned__(8))); static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb) { diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index b0478d01a0c5..0f255b54b051 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -53,6 +53,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o +obj-y += smp.o extra-y += head64.o vmlinux.lds @@ -60,7 +61,6 @@ obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o obj-$(CONFIG_AUDIT) += audit.o diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6f2a193ccccc..38d4bdbc34b9 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -194,7 +194,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -217,7 +217,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 0ebf08c3b35e..6d321f5f101d 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -647,11 +647,23 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } -/* - * debug_register_mode: - * - Creates and initializes debug area for the caller - * The mode parameter allows to specify access rights for the s390dbf files - * - Returns handle for debug area +/** + * debug_register_mode() - creates and initializes debug area. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * @mode: File mode for debugfs files. E.g. S_IRWXUGO + * @uid: User ID for debugfs files. Currently only 0 is supported. + * @gid: Group ID for debugfs files. Currently only 0 is supported. + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * Must not be called within an interrupt handler. */ debug_info_t *debug_register_mode(const char *name, int pages_per_area, int nr_areas, int buf_size, umode_t mode, @@ -681,10 +693,21 @@ out: } EXPORT_SYMBOL(debug_register_mode); -/* - * debug_register: - * - creates and initializes debug area for the caller - * - returns handle for debug area +/** + * debug_register() - creates and initializes debug area with default file mode. + * + * @name: Name of debug log (e.g. used for debugfs entry) + * @pages_per_area: Number of pages, which will be allocated per area + * @nr_areas: Number of debug areas + * @buf_size: Size of data area in each debug entry + * + * Return: + * - Handle for generated debug area + * - %NULL if register failed + * + * Allocates memory for a debug log. + * The debugfs file mode access permissions are read and write for user. + * Must not be called within an interrupt handler. */ debug_info_t *debug_register(const char *name, int pages_per_area, int nr_areas, int buf_size) @@ -694,9 +717,13 @@ debug_info_t *debug_register(const char *name, int pages_per_area, } EXPORT_SYMBOL(debug_register); -/* - * debug_unregister: - * - give back debug area +/** + * debug_unregister() - give back debug area. + * + * @id: handle for debug log + * + * Return: + * none */ void debug_unregister(debug_info_t *id) { @@ -745,9 +772,14 @@ out: return rc; } -/* - * debug_set_level: - * - set actual debug level +/** + * debug_set_level() - Sets new actual debug level if new_level is valid. + * + * @id: handle for debug log + * @new_level: new debug level + * + * Return: + * none */ void debug_set_level(debug_info_t *id, int new_level) { @@ -873,6 +905,14 @@ static struct ctl_table s390dbf_dir_table[] = { static struct ctl_table_header *s390dbf_sysctl_header; +/** + * debug_stop_all() - stops the debug feature if stopping is allowed. + * + * Return: + * - none + * + * Currently used in case of a kernel oops. + */ void debug_stop_all(void) { if (debug_stoppable) @@ -880,6 +920,17 @@ void debug_stop_all(void) } EXPORT_SYMBOL(debug_stop_all); +/** + * debug_set_critical() - event/exception functions try lock instead of spin. + * + * Return: + * - none + * + * Currently used in case of stopping all CPUs but the current one. + * Once in this state, functions to write a debug entry for an + * event or exception no longer spin on the debug area lock, + * but only try to get it and fail if they do not get the lock. + */ void debug_set_critical(void) { debug_critical = 1; @@ -1036,8 +1087,16 @@ debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *stri } EXPORT_SYMBOL(__debug_sprintf_exception); -/* - * debug_register_view: +/** + * debug_register_view() - registers new debug view and creates debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ int debug_register_view(debug_info_t *id, struct debug_view *view) { @@ -1077,8 +1136,16 @@ out: } EXPORT_SYMBOL(debug_register_view); -/* - * debug_unregister_view: +/** + * debug_unregister_view() - unregisters debug view and removes debugfs + * dir entry + * + * @id: handle for debug log + * @view: pointer to debug view struct + * + * Return: + * - 0 : ok + * - < 0: Error */ int debug_unregister_view(debug_info_t *id, struct debug_view *view) { diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b2c68fbf2634..7abe6ae261b4 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -242,6 +242,7 @@ static const unsigned char formats[][6] = { [INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 }, [INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 }, [INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 }, + [INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 }, [INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 }, [INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 }, [INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 }, @@ -306,7 +307,7 @@ static const unsigned char formats[][6] = { [INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 }, [INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 }, [INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 }, - [INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 }, + [INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 }, [INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 }, [INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 }, @@ -326,10 +327,8 @@ static const unsigned char formats[][6] = { [INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 }, [INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 }, [INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 }, - [INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 }, [INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 }, [INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 }, - [INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 }, [INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 }, [INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 }, [INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 }, diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 9e87b68be21c..ac06c3949ab3 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -199,9 +199,7 @@ void die(struct pt_regs *regs, const char *str) #ifdef CONFIG_PREEMPT pr_cont("PREEMPT "); #endif -#ifdef CONFIG_SMP pr_cont("SMP "); -#endif if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); pr_cont("\n"); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 629f173f60cd..6312fed48530 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -30,7 +30,6 @@ #include <asm/sclp.h> #include <asm/facility.h> #include <asm/boot_data.h> -#include <asm/pci_insn.h> #include "entry.h" /* @@ -236,7 +235,6 @@ static __init void detect_machine_facilities(void) clock_comparator_max = -1ULL >> 1; __ctl_set_bit(0, 53); } - enable_mio_ctl(); } static inline void save_vector_registers(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f4d272577d3..270d1d145761 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -986,14 +986,12 @@ ENTRY(psw_idle) stg %r3,__SF_EMPTY(%r15) larl %r1,.Lpsw_idle_lpsw+4 stg %r1,__SF_EMPTY+8(%r15) -#ifdef CONFIG_SMP larl %r1,smp_cpu_mtid llgf %r1,0(%r1) ltgr %r1,%r1 jz .Lpsw_idle_stcctm .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15) .Lpsw_idle_stcctm: -#endif oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT BPON STCK __CLOCK_IDLE_ENTER(%r2) @@ -1468,7 +1466,6 @@ ENDPROC(cleanup_critical) mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2) mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2) 1: # calculate idle cycles -#ifdef CONFIG_SMP clg %r9,BASED(.Lcleanup_idle_insn) jl 3f larl %r1,smp_cpu_mtid @@ -1486,7 +1483,6 @@ ENDPROC(cleanup_critical) la %r3,8(%r3) la %r4,8(%r4) brct %r1,2b -#endif 3: # account system time going idle lg %r9,__LC_STEAL_TIMER alg %r9,__CLOCK_IDLE_ENTER(%r2) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 20420c2b8a14..b2956d49b6ad 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -63,7 +63,6 @@ void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); void __init time_init(void); -int pfn_is_nosave(unsigned long); void s390_early_resume(void); unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index d836af3ccc38..2c0a515428d6 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -286,12 +286,7 @@ static struct kobj_attribute sys_ipl_secure_attr = static ssize_t ipl_has_secure_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - if (MACHINE_IS_LPAR) - return sprintf(page, "%i\n", !!sclp.has_sipl); - else if (MACHINE_IS_VM) - return sprintf(page, "%i\n", !!sclp.has_sipl_g2); - else - return sprintf(page, "%i\n", 0); + return sprintf(page, "%i\n", !!sclp.has_sipl); } static struct kobj_attribute sys_ipl_has_secure_attr = diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 3f10b56bd5a3..ab584e8e3527 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -15,16 +15,11 @@ struct insn { s32 offset; } __packed; -struct insn_args { - struct jump_entry *entry; - enum jump_label_type type; -}; - static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn) { - /* brcl 0,0 */ + /* brcl 0,offset */ insn->opcode = 0xc004; - insn->offset = 0; + insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1; } static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) @@ -77,23 +72,15 @@ static void __jump_label_transform(struct jump_entry *entry, s390_kernel_write(code, &new, sizeof(new)); } -static int __sm_arch_jump_label_transform(void *data) +static void __jump_label_sync(void *dummy) { - struct insn_args *args = data; - - __jump_label_transform(args->entry, args->type, 0); - return 0; } void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - struct insn_args args; - - args.entry = entry; - args.type = type; - - stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL); + __jump_label_transform(entry, type, 0); + smp_call_function(__jump_label_sync, NULL, 1); } void arch_jump_label_transform_static(struct jump_entry *entry, diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 8a1ae140c5e2..444a19125a81 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -141,7 +141,6 @@ static noinline void __machine_kdump(void *image) */ store_status(__do_machine_kdump, image); } -#endif static unsigned long do_start_kdump(unsigned long addr) { @@ -155,6 +154,8 @@ static unsigned long do_start_kdump(unsigned long addr) return rc; } +#endif /* CONFIG_CRASH_DUMP */ + /* * Check if kdump checksums are valid: We call purgatory with parameter "0" */ diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c index 34cc96449b30..8b33e03e47b8 100644 --- a/arch/s390/kernel/perf_cpum_cf_events.c +++ b/arch/s390/kernel/perf_cpum_cf_events.c @@ -624,6 +624,8 @@ __init const struct attribute_group **cpumf_cf_event_group(void) break; case 0x3906: case 0x3907: + case 0x8561: + case 0x8562: model = cpumcf_z14_pmu_event_attr; break; default: diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5de13307b703..6ebc2117c66c 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -7,6 +7,7 @@ #define KMSG_COMPONENT "cpu" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include <linux/stop_machine.h> #include <linux/cpufeature.h> #include <linux/bitops.h> #include <linux/kernel.h> @@ -31,6 +32,7 @@ struct cpu_info { }; static DEFINE_PER_CPU(struct cpu_info, cpu_info); +static DEFINE_PER_CPU(int, cpu_relax_retry); static bool machine_has_cpu_mhz; @@ -58,15 +60,20 @@ void s390_update_cpu_mhz(void) on_each_cpu(update_cpu_mhz, NULL, 0); } -void notrace cpu_relax_yield(void) +void notrace stop_machine_yield(const struct cpumask *cpumask) { - if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) { - diag_stat_inc(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); + int cpu, this_cpu; + + this_cpu = smp_processor_id(); + if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { + __this_cpu_write(cpu_relax_retry, 0); + cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + if (cpu >= nr_cpu_ids) + return; + if (arch_vcpu_is_preempted(cpu)) + smp_yield_cpu(cpu); } - barrier(); } -EXPORT_SYMBOL(cpu_relax_yield); /* * cpu_init - initializes state that is per-CPU. diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index f8544d517430..2b94b0ad3588 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -461,11 +461,9 @@ static void __init setup_lowcore_dat_off(void) mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source); mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw); -#ifdef CONFIG_SMP lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; arch_spin_lock_setup(0); -#endif lc->br_r1_trampoline = 0x07f1; /* br %r1 */ set_prefix((u32)(unsigned long) lc); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 22f08245aa5d..e6fca5498e1f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -232,7 +232,7 @@ SYSCALL_DEFINE0(sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -256,7 +256,7 @@ SYSCALL_DEFINE0(rt_sigreturn) load_sigregs(); return regs->gprs[2]; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 35fafa2b91a8..44974654cbd0 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -232,8 +232,6 @@ out: return -ENOMEM; } -#ifdef CONFIG_HOTPLUG_CPU - static void pcpu_free_lowcore(struct pcpu *pcpu) { unsigned long async_stack, nodat_stack, lowcore; @@ -253,8 +251,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) free_pages(lowcore, LC_ORDER); } -#endif /* CONFIG_HOTPLUG_CPU */ - static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { struct lowcore *lc = pcpu->lowcore; @@ -418,7 +414,7 @@ void smp_yield_cpu(int cpu) diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44) { + } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { diag_stat_inc_norecursion(DIAG_STAT_X044); asm volatile("diag 0,0,0x44"); } @@ -895,8 +891,6 @@ static int __init _setup_possible_cpus(char *s) } early_param("possible_cpus", _setup_possible_cpus); -#ifdef CONFIG_HOTPLUG_CPU - int __cpu_disable(void) { unsigned long cregs[16]; @@ -937,8 +931,6 @@ void __noreturn cpu_die(void) for (;;) ; } -#endif /* CONFIG_HOTPLUG_CPU */ - void __init smp_fill_possible_mask(void) { unsigned int possible, sclp_max, cpu; @@ -996,7 +988,6 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } -#ifdef CONFIG_HOTPLUG_CPU static ssize_t cpu_configure_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1073,7 +1064,6 @@ out: return rc ? rc : count; } static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); -#endif /* CONFIG_HOTPLUG_CPU */ static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) @@ -1083,9 +1073,7 @@ static ssize_t show_cpu_address(struct device *dev, static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); static struct attribute *cpu_common_attrs[] = { -#ifdef CONFIG_HOTPLUG_CPU &dev_attr_configure.attr, -#endif &dev_attr_address.attr, NULL, }; @@ -1144,15 +1132,11 @@ static int smp_add_present_cpu(int cpu) out_topology: sysfs_remove_group(&s->kobj, &cpu_common_attr_group); out_cpu: -#ifdef CONFIG_HOTPLUG_CPU unregister_cpu(c); -#endif out: return rc; } -#ifdef CONFIG_HOTPLUG_CPU - int __ref smp_rescan_cpus(void) { struct sclp_core_info *info; @@ -1188,17 +1172,14 @@ static ssize_t __ref rescan_store(struct device *dev, return rc ? rc : count; } static DEVICE_ATTR_WO(rescan); -#endif /* CONFIG_HOTPLUG_CPU */ static int __init s390_smp_init(void) { int cpu, rc = 0; -#ifdef CONFIG_HOTPLUG_CPU rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan); if (rc) return rc; -#endif for_each_present_cpu(cpu) { rc = smp_add_present_cpu(cpu); if (rc) diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index 19a3c427801a..a7baf0b5f818 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -162,7 +162,6 @@ ENTRY(swsusp_arch_resume) larl %r1,__swsusp_reset_dma lg %r1,0(%r1) BASR_EX %r14,%r1 -#ifdef CONFIG_SMP larl %r1,smp_cpu_mt_shift icm %r1,15,0(%r1) jz smt_done @@ -172,7 +171,6 @@ smt_loop: brc 8,smt_done /* accepted */ brc 2,smt_loop /* busy, try again */ smt_done: -#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e822b2964a83..6ebacfeaf853 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig sys_fsconfig 432 common fsmount sys_fsmount sys_fsmount 433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 82e81a9f7112..164c0282b41a 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -45,7 +45,7 @@ int is_valid_bugaddr(unsigned long addr) void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { if (user_mode(regs)) { - force_sig_fault(si_signo, si_code, get_trap_ip(regs), current); + force_sig_fault(si_signo, si_code, get_trap_ip(regs)); report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; @@ -79,7 +79,7 @@ void do_per_trap(struct pt_regs *regs) if (!current->ptrace) return; force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __force __user *) current->thread.per_event.address, current); + (void __force __user *) current->thread.per_event.address); } NOKPROBE_SYMBOL(do_per_trap); @@ -165,7 +165,7 @@ void illegal_op(struct pt_regs *regs) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { if (current->ptrace) - force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, location); else signal = SIGILL; #ifdef CONFIG_UPROBES @@ -229,17 +229,11 @@ void vector_exception(struct pt_regs *regs) void data_exception(struct pt_regs *regs) { - int signal = 0; - save_fpu_regs(); if (current->thread.fpu.fpc & FPC_DXC_MASK) - signal = SIGFPE; - else - signal = SIGILL; - if (signal == SIGFPE) do_fp_trap(regs, current->thread.fpu.fpc); - else if (signal) - do_trap(regs, signal, ILL_ILLOPN, "data exception"); + else + do_trap(regs, SIGILL, ILL_ILLOPN, "data exception"); } void space_switch_exception(struct pt_regs *regs) diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 57fd4e902f1f..8fc9daae47a2 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -20,7 +20,7 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); static bool outside_of_stack(struct unwind_state *state, unsigned long sp) { return (sp <= state->sp) || - (sp + sizeof(struct stack_frame) > state->stack_info.end); + (sp > state->stack_info.end - sizeof(struct stack_frame)); } static bool update_stack_info(struct unwind_state *state, unsigned long sp) @@ -46,18 +46,18 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]); + sp = READ_ONCE_NOCHECK(regs->gprs[15]); if (unlikely(outside_of_stack(state, sp))) { if (!update_stack_info(state, sp)) goto out_err; } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; } else { sf = (struct stack_frame *) state->sp; - sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain); + sp = READ_ONCE_NOCHECK(sf->back_chain); if (likely(sp)) { /* Non-zero back-chain points to the previous frame */ if (unlikely(outside_of_stack(state, sp))) { @@ -65,7 +65,7 @@ bool unwind_next_frame(struct unwind_state *state) goto out_err; } sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = true; } else { /* No back-chain, look for a pt_regs structure */ @@ -73,9 +73,9 @@ bool unwind_next_frame(struct unwind_state *state) if (!on_stack(info, sp, sizeof(struct pt_regs))) goto out_stop; regs = (struct pt_regs *) sp; - if (user_mode(regs)) + if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) goto out_stop; - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; } } @@ -132,11 +132,11 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, /* Get the instruction pointer from pt_regs or the stack frame */ if (regs) { - ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr); + ip = READ_ONCE_NOCHECK(regs->psw.addr); reliable = true; } else { sf = (struct stack_frame *) sp; - ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]); + ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 28ebd647784c..3f520cd837fb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -227,6 +227,11 @@ int kvm_arch_hardware_enable(void) return 0; } +int kvm_arch_check_processor_compat(void) +{ + return 0; +} + static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, unsigned long end); @@ -2418,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -2461,6 +2466,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_kvm_facility(kvm->arch.model.fac_list, 147); } + if (css_general_characteristics.aiv && test_facility(65)) + set_kvm_facility(kvm->arch.model.fac_mask, 65); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 8679bd74d337..ed52ffa8d5d4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -27,6 +27,7 @@ #include <asm/io.h> #include <asm/ptrace.h> #include <asm/sclp.h> +#include <asm/ap.h> #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -592,6 +593,89 @@ static int handle_io_inst(struct kvm_vcpu *vcpu) } } +/* + * handle_pqap: Handling pqap interception + * @vcpu: the vcpu having issue the pqap instruction + * + * We now support PQAP/AQIC instructions and we need to correctly + * answer the guest even if no dedicated driver's hook is available. + * + * The intercepting code calls a dedicated callback for this instruction + * if a driver did register one in the CRYPTO satellite of the + * SIE block. + * + * If no callback is available, the queues are not available, return this + * response code to the caller and set CC to 3. + * Else return the response code returned by the callback. + */ +static int handle_pqap(struct kvm_vcpu *vcpu) +{ + struct ap_queue_status status = {}; + unsigned long reg0; + int ret; + uint8_t fc; + + /* Verify that the AP instruction are available */ + if (!ap_instructions_available()) + return -EOPNOTSUPP; + /* Verify that the guest is allowed to use AP instructions */ + if (!(vcpu->arch.sie_block->eca & ECA_APIE)) + return -EOPNOTSUPP; + /* + * The only possibly intercepted functions when AP instructions are + * available for the guest are AQIC and TAPQ with the t bit set + * since we do not set IC.3 (FIII) we currently will only intercept + * the AQIC function code. + */ + reg0 = vcpu->run->s.regs.gprs[0]; + fc = (reg0 >> 24) & 0xff; + if (WARN_ON_ONCE(fc != 0x03)) + return -EOPNOTSUPP; + + /* PQAP instruction is allowed for guest kernel only */ + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + /* Common PQAP instruction specification exceptions */ + /* bits 41-47 must all be zeros */ + if (reg0 & 0x007f0000UL) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* APFT not install and T bit set */ + if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* APXA not installed and APID greater 64 or APQI greater 16 */ + if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* AQIC function code specific exception */ + /* facility 65 not present for AQIC function code */ + if (!test_kvm_facility(vcpu->kvm, 65)) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* + * Verify that the hook callback is registered, lock the owner + * and call the hook. + */ + if (vcpu->kvm->arch.crypto.pqap_hook) { + if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner)) + return -EOPNOTSUPP; + ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu); + module_put(vcpu->kvm->arch.crypto.pqap_hook->owner); + if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000) + kvm_s390_set_psw_cc(vcpu, 3); + return ret; + } + /* + * A vfio_driver must register a hook. + * No hook means no driver to enable the SIE CRYCB and no queues. + * We send this response to the guest. + */ + status.response_code = 0x01; + memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status)); + kvm_s390_set_psw_cc(vcpu, 3); + return 0; +} + static int handle_stfl(struct kvm_vcpu *vcpu) { int rc; @@ -878,6 +962,8 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu) return handle_sthyi(vcpu); case 0x7d: return handle_stsi(vcpu); + case 0xaf: + return handle_pqap(vcpu); case 0xb1: return handle_stfl(vcpu); case 0xb2: diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 5418d10dc2a8..a1ec63abfb95 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,9 +3,8 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess.o find.o +lib-y += delay.o string.o uaccess.o find.o spinlock.o obj-y += mem.o xor.o -lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index df75d574246d..0ba174f779da 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -248,8 +248,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { report_user_fault(regs, SIGSEGV, 1); force_sig_fault(SIGSEGV, si_code, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), - current); + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } const struct exception_table_entry *s390_search_extables(unsigned long addr) @@ -310,8 +309,7 @@ static noinline void do_sigbus(struct pt_regs *regs) * or user mode. */ force_sig_fault(SIGBUS, BUS_ADRERR, - (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), - current); + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK)); } static noinline int signal_return(struct pt_regs *regs) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 14d1eae9fe43..f0bee6af3960 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -18,6 +18,7 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/swap.h> +#include <linux/swiotlb.h> #include <linux/smp.h> #include <linux/init.h> #include <linux/pagemap.h> @@ -29,6 +30,7 @@ #include <linux/export.h> #include <linux/cma.h> #include <linux/gfp.h> +#include <linux/dma-mapping.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -42,6 +44,8 @@ #include <asm/sclp.h> #include <asm/set_memory.h> #include <asm/kasan.h> +#include <asm/dma-mapping.h> +#include <asm/uv.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir); @@ -128,6 +132,47 @@ void mark_rodata_ro(void) pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } +int set_memory_encrypted(unsigned long addr, int numpages) +{ + int i; + + /* make specified pages unshared, (swiotlb, dma_free) */ + for (i = 0; i < numpages; ++i) { + uv_remove_shared(addr); + addr += PAGE_SIZE; + } + return 0; +} + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + int i; + /* make specified pages shared (swiotlb, dma_alloca) */ + for (i = 0; i < numpages; ++i) { + uv_set_shared(addr); + addr += PAGE_SIZE; + } + return 0; +} + +/* are we a protected virtualization guest? */ +bool sev_active(void) +{ + return is_prot_virt_guest(); +} + +/* protected virtualization */ +static void pv_init(void) +{ + if (!is_prot_virt_guest()) + return; + + /* make sure bounce buffers are shared */ + swiotlb_init(1); + swiotlb_update_mem_attributes(); + swiotlb_force = SWIOTLB_FORCE; +} + void __init mem_init(void) { cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); @@ -136,6 +181,8 @@ void __init mem_init(void) set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + pv_init(); + /* Setup guest page hinting */ cmma_init(); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 818deeb1ebc3..1864a8bb9622 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -52,21 +52,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz * Therefore we have a read-modify-write sequence: the function reads eight * bytes from destination at an eight byte boundary, modifies the bytes * requested and writes the result back in a loop. - * - * Note: this means that this function may not be called concurrently on - * several cpus with overlapping words, since this may potentially - * cause data corruption. */ +static DEFINE_SPINLOCK(s390_kernel_write_lock); + void notrace s390_kernel_write(void *dst, const void *src, size_t size) { + unsigned long flags; long copied; + spin_lock_irqsave(&s390_kernel_write_lock, flags); while (size) { copied = s390_kernel_write_odd(dst, src, size); dst += copied; src += copied; size -= copied; } + spin_unlock_irqrestore(&s390_kernel_write_lock, flags); } static int __memcpy_real(void *dest, void *src, size_t count) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 687f2a4d3459..cbc718ba6d78 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -24,8 +24,6 @@ static unsigned long stack_maxrandom_size(void) { if (!(current->flags & PF_RANDOMIZE)) return 0; - if (current->personality & ADDR_NO_RANDOMIZE) - return 0; return STACK_RND_MASK << PAGE_SHIFT; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 5e7c63033159..e636728ab452 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -299,9 +299,11 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) #define EMIT_ZERO(b1) \ ({ \ - /* llgfr %dst,%dst (zero extend to 64 bit) */ \ - EMIT4(0xb9160000, b1, b1); \ - REG_SET_SEEN(b1); \ + if (!fp->aux->verifier_zext) { \ + /* llgfr %dst,%dst (zero extend to 64 bit) */ \ + EMIT4(0xb9160000, b1, b1); \ + REG_SET_SEEN(b1); \ + } \ }) /* @@ -520,6 +522,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ /* llgfr %dst,%src */ EMIT4(0xb9160000, dst_reg, src_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ /* lgr %dst,%src */ @@ -528,6 +532,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ /* llilf %dst,imm */ EMIT6_IMM(0xc00f0000, dst_reg, imm); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */ /* lgfi %dst,imm */ @@ -639,6 +645,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT4(0xb9970000, REG_W0, src_reg); /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; } case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */ @@ -676,6 +684,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT_CONST_U32(imm)); /* llgfr %dst,%rc */ EMIT4(0xb9160000, dst_reg, rc_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; } case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */ @@ -864,10 +874,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i case 16: /* dst = (u16) cpu_to_be16(dst) */ /* llghr %dst,%dst */ EMIT4(0xb9850000, dst_reg, dst_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case 32: /* dst = (u32) cpu_to_be32(dst) */ - /* llgfr %dst,%dst */ - EMIT4(0xb9160000, dst_reg, dst_reg); + if (!fp->aux->verifier_zext) + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); break; case 64: /* dst = (u64) cpu_to_be64(dst) */ break; @@ -882,12 +895,15 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i EMIT4_DISP(0x88000000, dst_reg, REG_0, 16); /* llghr %dst,%dst */ EMIT4(0xb9850000, dst_reg, dst_reg); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case 32: /* dst = (u32) cpu_to_le32(dst) */ /* lrvr %dst,%dst */ EMIT4(0xb91f0000, dst_reg, dst_reg); - /* llgfr %dst,%dst */ - EMIT4(0xb9160000, dst_reg, dst_reg); + if (!fp->aux->verifier_zext) + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); break; case 64: /* dst = (u64) cpu_to_le64(dst) */ /* lrvgr %dst,%dst */ @@ -968,16 +984,22 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* llgc %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ /* llgh %dst,0(off,%src) */ EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); jit->seen |= SEEN_MEM; + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ /* llgf %dst,off(%src) */ jit->seen |= SEEN_MEM; EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); + if (insn_is_zext(&insn[1])) + insn_count = 2; break; case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ /* lg %dst,0(off,%src) */ @@ -1282,6 +1304,11 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) return 0; } +bool bpf_jit_needs_zext(void) +{ + return true; +} + /* * Compile eBPF program "fp" */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 86ca7f88fb22..b0e3b9a0e488 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -421,12 +421,12 @@ static void zpci_map_resources(struct pci_dev *pdev) if (!len) continue; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) pdev->resource[i].start = (resource_size_t __force) zdev->bars[i].mio_wb; else - pdev->resource[i].start = - (resource_size_t __force) pci_iomap(pdev, i, 0); + pdev->resource[i].start = (resource_size_t __force) + pci_iomap_range_fh(pdev, i, 0, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; } @@ -444,18 +444,19 @@ static void zpci_map_resources(struct pci_dev *pdev) static void zpci_unmap_resources(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); resource_size_t len; int i; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) return; for (i = 0; i < PCI_BAR_COUNT; i++) { len = pci_resource_len(pdev, i); if (!len) continue; - pci_iounmap(pdev, (void __iomem __force *) - pdev->resource[i].start); + pci_iounmap_fh(pdev, (void __iomem __force *) + pdev->resource[i].start); } } @@ -528,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev, if (zdev->bars[i].val & 4) flags |= IORESOURCE_MEM_64; - if (static_branch_likely(&have_mio)) + if (zpci_use_mio(zdev)) addr = (unsigned long) zdev->bars[i].mio_wb; else addr = ZPCI_ADDR(entry); @@ -889,8 +890,10 @@ static int __init pci_base_init(void) if (!test_facility(69) || !test_facility(71)) return 0; - if (test_facility(153) && !s390_pci_no_mio) + if (test_facility(153) && !s390_pci_no_mio) { static_branch_enable(&have_mio); + ctl_set_bit(2, 5); + } rc = zpci_debug_init(); if (rc) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d03631dba7c2..9bdff4defef1 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -291,7 +291,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as) goto out; zdev->fh = fh; - if (zdev->mio_capable) { + if (zpci_use_mio(zdev)) { rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO); zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc); if (rc) diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 6b48ca7760a7..3408c0df3ebf 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -74,7 +74,7 @@ static void pci_sw_counter_show(struct seq_file *m) int i; for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) - seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i], + seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], atomic64_read(counter)); } diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 430c14b006d1..a433ba01a317 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -37,6 +37,15 @@ zpci_attr(segment1, "0x%02x\n", pfip[1]); zpci_attr(segment2, "0x%02x\n", pfip[2]); zpci_attr(segment3, "0x%02x\n", pfip[3]); +static ssize_t mio_enabled_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + + return sprintf(buf, zpci_use_mio(zdev) ? "1\n" : "0\n"); +} +static DEVICE_ATTR_RO(mio_enabled); + static ssize_t recover_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -115,6 +124,7 @@ static struct attribute *zpci_dev_attrs[] = { &dev_attr_vfn.attr, &dev_attr_uid.attr, &dev_attr_recover.attr, + &dev_attr_mio_enabled.attr, NULL, }; static struct attribute_group zpci_attr_group = { diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore index e9e66f178a6d..04a03433c720 100644 --- a/arch/s390/purgatory/.gitignore +++ b/arch/s390/purgatory/.gitignore @@ -1,2 +1,3 @@ -kexec-purgatory.c +purgatory +purgatory.lds purgatory.ro diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index 2342b84b3386..b5e35e8f999a 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -6,7 +6,6 @@ kapi := arch/$(ARCH)/include/generated/asm kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h -targets += $(addprefix ../../../,$(kapi-hdrs-y)) PHONY += kapi kapi: $(kapi-hdrs-y) @@ -14,11 +13,7 @@ kapi: $(kapi-hdrs-y) hostprogs-y += gen_facilities hostprogs-y += gen_opcode_table -HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE) -HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE) - -# Ensure output directory exists -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE) filechk_facility-defs.h = $(obj)/gen_facilities diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt index 64638b764d1c..46d8ed96cf06 100644 --- a/arch/s390/tools/opcodes.txt +++ b/arch/s390/tools/opcodes.txt @@ -520,6 +520,9 @@ b92e km RRE_RR b92f kmc RRE_RR b930 cgfr RRE_RR b931 clgfr RRE_RR +b938 sortl RRE_RR +b939 dfltcc RRF_R0RR2 +b93a kdsa RRE_RR b93c ppno RRE_RR b93e kimd RRE_RR b93f klmd RRE_RR @@ -538,8 +541,16 @@ b95a cxlgtr RRF_UUFR b95b cxlftr RRF_UUFR b960 cgrt RRF_U0RR b961 clgrt RRF_U0RR +b964 nngrk RRF_R0RR2 +b965 ocgrk RRF_R0RR2 +b966 nogrk RRF_R0RR2 +b967 nxgrk RRF_R0RR2 b972 crt RRF_U0RR b973 clrt RRF_U0RR +b974 nnrk RRF_R0RR2 +b975 ocrk RRF_R0RR2 +b976 nork RRF_R0RR2 +b977 nxrk RRF_R0RR2 b980 ngr RRE_RR b981 ogr RRE_RR b982 xgr RRE_RR @@ -573,6 +584,7 @@ b99f ssair RRE_R0 b9a0 clp RRF_U0RR b9a1 tpei RRE_RR b9a2 ptf RRE_R0 +b9a4 uvc RRF_URR b9aa lptea RRF_RURR2 b9ab essa RRF_U0RR b9ac irbm RRE_RR @@ -585,6 +597,7 @@ b9b3 cu42 RRE_RR b9bd trtre RRF_U0RR b9be srstu RRE_RR b9bf trte RRF_U0RR +b9c0 selhhhr RRF_RURR b9c8 ahhhr RRF_R0RR2 b9c9 shhhr RRF_R0RR2 b9ca alhhhr RRF_R0RR2 @@ -594,6 +607,9 @@ b9cf clhhr RRE_RR b9d0 pcistg RRE_RR b9d2 pcilg RRE_RR b9d3 rpcit RRE_RR +b9d4 pcistgi RRE_RR +b9d5 pciwb RRE_00 +b9d6 pcilgi RRE_RR b9d8 ahhlr RRF_R0RR2 b9d9 shhlr RRF_R0RR2 b9da alhhlr RRF_R0RR2 @@ -601,9 +617,11 @@ b9db slhhlr RRF_R0RR2 b9dd chlr RRE_RR b9df clhlr RRE_RR b9e0 locfhr RRF_U0RR -b9e1 popcnt RRE_RR +b9e1 popcnt RRF_U0RR b9e2 locgr RRF_U0RR +b9e3 selgr RRF_RURR b9e4 ngrk RRF_R0RR2 +b9e5 ncgrk RRF_R0RR2 b9e6 ogrk RRF_R0RR2 b9e7 xgrk RRF_R0RR2 b9e8 agrk RRF_R0RR2 @@ -612,8 +630,10 @@ b9ea algrk RRF_R0RR2 b9eb slgrk RRF_R0RR2 b9ec mgrk RRF_R0RR2 b9ed msgrkc RRF_R0RR2 +b9f0 selr RRF_RURR b9f2 locr RRF_U0RR b9f4 nrk RRF_R0RR2 +b9f5 ncrk RRF_R0RR2 b9f6 ork RRF_R0RR2 b9f7 xrk RRF_R0RR2 b9f8 ark RRF_R0RR2 @@ -822,6 +842,7 @@ e3d4 stpcifc RXY_RRRD e500 lasp SSE_RDRD e501 tprot SSE_RDRD e502 strag SSE_RDRD +e50a mvcrl SSE_RDRD e50e mvcsk SSE_RDRD e50f mvcdk SSE_RDRD e544 mvhhi SIL_RDI @@ -835,6 +856,18 @@ e55c chsi SIL_RDI e55d clfhsi SIL_RDU e560 tbegin SIL_RDU e561 tbeginc SIL_RDU +e601 vlebrh VRX_VRRDU +e602 vlebrg VRX_VRRDU +e603 vlebrf VRX_VRRDU +e604 vllebrz VRX_VRRDU +e605 vlbrrep VRX_VRRDU +e606 vlbr VRX_VRRDU +e607 vler VRX_VRRDU +e609 vstebrh VRX_VRRDU +e60a vstebrg VRX_VRRDU +e60b vstebrf VRX_VRRDU +e60e vstbr VRX_VRRDU +e60f vster VRX_VRRDU e634 vpkz VSI_URDV e635 vlrl VSI_URDV e637 vlrlr VRS_RRDV @@ -842,8 +875,8 @@ e63c vupkz VSI_URDV e63d vstrl VSI_URDV e63f vstrlr VRS_RRDV e649 vlip VRI_V0UU2 -e650 vcvb VRR_RV0U -e652 vcvbg VRR_RV0U +e650 vcvb VRR_RV0UU +e652 vcvbg VRR_RV0UU e658 vcvd VRI_VR0UU e659 vsrp VRI_VVUUU2 e65a vcvdg VRI_VR0UU @@ -863,13 +896,13 @@ e702 vleg VRX_VRRDU e703 vlef VRX_VRRDU e704 vllez VRX_VRRDU e705 vlrep VRX_VRRDU -e706 vl VRX_VRRD +e706 vl VRX_VRRDU e707 vlbb VRX_VRRDU e708 vsteb VRX_VRRDU e709 vsteh VRX_VRRDU e70a vsteg VRX_VRRDU e70b vstef VRX_VRRDU -e70e vst VRX_VRRD +e70e vst VRX_VRRDU e712 vgeg VRV_VVXRDU e713 vgef VRV_VVXRDU e71a vsceg VRV_VVXRDU @@ -879,11 +912,11 @@ e722 vlvg VRS_VRRDU e727 lcbb RXE_RRRDU e730 vesl VRS_VVRDU e733 verll VRS_VVRDU -e736 vlm VRS_VVRD +e736 vlm VRS_VVRDU e737 vll VRS_VRRD e738 vesrl VRS_VVRDU e73a vesra VRS_VVRDU -e73e vstm VRS_VVRD +e73e vstm VRS_VVRDU e73f vstl VRS_VRRD e740 vleib VRI_V0IU e741 vleih VRI_V0IU @@ -932,7 +965,10 @@ e781 vfene VRR_VVV0U0U e782 vfae VRR_VVV0U0U e784 vpdi VRR_VVV0U e785 vbperm VRR_VVV +e786 vsld VRI_VVV0U +e787 vsrd VRI_VVV0U e78a vstrc VRR_VVVUU0V +e78b vstrs VRR_VVVUU0V e78c vperm VRR_VVV0V e78d vsel VRR_VVV0V e78e vfms VRR_VVVU0UV @@ -1060,6 +1096,7 @@ eb9b stamy RSY_AARD ebc0 tp RSL_R0RD ebd0 pcistb RSY_RRRD ebd1 sic RSY_RRRD +ebd4 pcistbi RSY_RRRD ebdc srak RSY_RRRD ebdd slak RSY_RRRD ebde srlk RSY_RRRD diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b77f512bb176..31a7d12db705 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 config SUPERH def_bool y + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_MIGHT_HAVE_PC_PARPORT @@ -14,6 +15,7 @@ config SUPERH select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE + select HAVE_FAST_GUP if MMU select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_GCOV_PROFILE_ALL @@ -63,6 +65,7 @@ config SUPERH config SUPERH32 def_bool "$(ARCH)" = "sh" select ARCH_32BIT_OFF_T + select GUP_GET_PTE_LOW_HIGH if X2TLB select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_IOREMAP_PROT if MMU && !X2TLB @@ -623,7 +626,7 @@ config CRASH_DUMP to a memory address not used by the main kernel using PHYSICAL_START. - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config KEXEC_JUMP bool "kexec jump (EXPERIMENTAL)" diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 4dcf7f552582..91d43e2bffea 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -40,7 +40,6 @@ CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_HIT=y CONFIG_FB_SH_MOBILE_LCDC=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FONTS=y CONFIG_FONT_PEARL_8x8=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 5209889765ad..49a29338789b 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -191,7 +191,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=m CONFIG_JFFS2_FS_XATTR=y CONFIG_UBIFS_FS=m -CONFIG_LOGFS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m CONFIG_ROMFS_FS=m diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index 5a1097641247..1e116529735f 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -63,7 +63,6 @@ CONFIG_NET_SCH_NETEM=y CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index 9c0ef13bee10..c66e512719ab 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -62,7 +62,6 @@ CONFIG_NET_SCH_NETEM=y CONFIG_NET_CLS_TCINDEX=y CONFIG_NET_CLS_ROUTE4=y CONFIG_NET_CLS_FW=y -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y diff --git a/arch/sh/configs/sh2007_defconfig b/arch/sh/configs/sh2007_defconfig index a1cf6447dbb1..cbd6742eb423 100644 --- a/arch/sh/configs/sh2007_defconfig +++ b/arch/sh/configs/sh2007_defconfig @@ -85,7 +85,6 @@ CONFIG_WATCHDOG=y CONFIG_SH_WDT=y CONFIG_SSB=y CONFIG_FB=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index 822fa9e96f74..171ab05ce4fc 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -142,7 +142,6 @@ CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_PEDIT=m -CONFIG_NET_CLS_IND=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_FW_LOADER=m CONFIG_CONNECTOR=m diff --git a/arch/sh/include/asm/flat.h b/arch/sh/include/asm/flat.h index 843d458b8329..fee4f25555cb 100644 --- a/arch/sh/include/asm/flat.h +++ b/arch/sh/include/asm/flat.h @@ -11,11 +11,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -25,8 +22,6 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) ({ (void)p; 0; }) #define FLAT_PLAT_INIT(_r) \ do { _r->regs[0]=0; _r->regs[1]=0; _r->regs[2]=0; _r->regs[3]=0; \ diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index c28e37a344ad..ac0561960c52 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -369,7 +369,11 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; } #define ioremap_nocache ioremap #define ioremap_uc ioremap -#define iounmap __iounmap + +static inline void iounmap(void __iomem *addr) +{ + __iounmap(addr); +} /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 7d8587eb65ff..779260b721ca 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -38,6 +38,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud) return pud_val(pud); } +/* only used by the stubbed out hugetlb gup code, should never be called */ +#define pud_page(pud) NULL + #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 3587103afe59..9085d1142fa3 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -149,6 +149,43 @@ extern void paging_init(void); extern void page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd); +static inline bool __pte_access_permitted(pte_t pte, u64 prot) +{ + return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot; +} + +#ifdef CONFIG_X2TLB +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT; + + prot |= _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); + if (write) + prot |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); + return __pte_access_permitted(pte, prot); +} +#elif defined(CONFIG_SUPERH64) +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; + + if (write) + prot |= _PAGE_WRITE; + return __pte_access_permitted(pte, prot); +} +#else +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot = _PAGE_PRESENT | _PAGE_USER; + + if (write) + prot |= _PAGE_RW; + return __pte_access_permitted(pte, prot); +} +#endif + +#define pte_access_permitted pte_access_permitted + /* arch/sh/mm/mmap.c */ #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index 9143c7babcbe..6c89e3e04cee 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -16,8 +16,31 @@ #define user_mode(regs) (((regs)->sr & 0x40000000)==0) #define kernel_stack_pointer(_regs) ((unsigned long)(_regs)->regs[15]) -#define GET_FP(regs) ((regs)->regs[14]) -#define GET_USP(regs) ((regs)->regs[15]) +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->pc; +} +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->pc = val; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->regs[14]; +} + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->regs[15]; +} + +static inline void user_stack_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->regs[15] = val; +} #define arch_has_single_step() (1) @@ -112,7 +135,5 @@ static inline unsigned long profile_pc(struct pt_regs *regs) return pc; } -#define profile_pc profile_pc -#include <asm-generic/ptrace.h> #endif /* __ASM_SH_PTRACE_H */ diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 74b48db86dd7..0bcff11a4843 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -568,5 +568,5 @@ BUILD_TRAP_HANDLER(fpu_error) return; } - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index 1ff56e5ba990..03ffd8cdf542 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -421,5 +421,5 @@ BUILD_TRAP_HANDLER(fpu_error) } } - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c index 9218d9ed787e..3966b5ee8e93 100644 --- a/arch/sh/kernel/cpu/sh5/fpu.c +++ b/arch/sh/kernel/cpu/sh5/fpu.c @@ -100,9 +100,7 @@ void restore_fpu(struct task_struct *tsk) asmlinkage void do_fpu_error(unsigned long ex, struct pt_regs *regs) { - struct task_struct *tsk = current; - regs->pc += 4; - force_sig(SIGFPE, tsk); + force_sig(SIGFPE); } diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index bc96b16288c1..3bd010b4c55f 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -338,7 +338,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* Deliver the signal to userspace */ if (!arch_check_bp_in_kernelspace(&bp->hw.info)) { force_sig_fault(SIGTRAP, TRAP_HWBKPT, - (void __user *)NULL, current); + (void __user *)NULL); } rcu_read_unlock(); diff --git a/arch/sh/kernel/kdebugfs.c b/arch/sh/kernel/kdebugfs.c index 95428e05d212..8b505e1556a5 100644 --- a/arch/sh/kernel/kdebugfs.c +++ b/arch/sh/kernel/kdebugfs.c @@ -9,9 +9,6 @@ EXPORT_SYMBOL(arch_debugfs_dir); static int __init arch_kdebugfs_init(void) { arch_debugfs_dir = debugfs_create_dir("sh", NULL); - if (!arch_debugfs_dir) - return -ENOMEM; - return 0; } arch_initcall(arch_kdebugfs_init); diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index 3390349ff976..11085e48eaa6 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -550,7 +550,7 @@ asmlinkage void do_single_step(unsigned long long vec, struct pt_regs *regs) continually stepping. */ local_irq_enable(); regs->sr &= ~SR_SSTEP; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } /* Called with interrupts disabled */ @@ -561,7 +561,7 @@ BUILD_TRAP_HANDLER(breakpoint) /* We need to forward step the PC, to counteract the backstep done in signal.c. */ local_irq_enable(); - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); regs->pc += 4; } diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 2a2121ba8ebe..24473fa6c3b6 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -176,7 +176,7 @@ asmlinkage int sys_sigreturn(void) return r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -207,7 +207,7 @@ asmlinkage int sys_rt_sigreturn(void) return r0; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index f1f1598879c2..b9aaa9266b34 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -277,7 +277,7 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3, return (int) ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -311,7 +311,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3, return (int) ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 016a727d4357..834c9c7d79fa 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 8b49cced663d..63cf17bc760d 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -141,7 +141,7 @@ BUILD_TRAP_HANDLER(debug) SIGTRAP) == NOTIFY_STOP) return; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } /* @@ -167,7 +167,7 @@ BUILD_TRAP_HANDLER(bug) } #endif - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } BUILD_TRAP_HANDLER(nmi) diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index f2a18b5fafd8..058c6181bb30 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -533,7 +533,7 @@ uspace_segv: "access (PC %lx PR %lx)\n", current->comm, regs->pc, regs->pr); - force_sig_fault(SIGBUS, si_code, (void __user *)address, current); + force_sig_fault(SIGBUS, si_code, (void __user *)address); } else { inc_unaligned_kernel_access(); @@ -603,7 +603,7 @@ asmlinkage void do_divide_error(unsigned long r4) /* Let gcc know unhandled cases don't make it past here */ return; } - force_sig_fault(SIGFPE, code, NULL, current); + force_sig_fault(SIGFPE, code, NULL); } #endif @@ -611,7 +611,6 @@ asmlinkage void do_reserved_inst(void) { struct pt_regs *regs = current_pt_regs(); unsigned long error_code; - struct task_struct *tsk = current; #ifdef CONFIG_SH_FPU_EMU unsigned short inst = 0; @@ -633,7 +632,7 @@ asmlinkage void do_reserved_inst(void) /* Enable DSP mode, and restart instruction. */ regs->sr |= SR_DSP; /* Save DSP mode */ - tsk->thread.dsp_status.status |= SR_DSP; + current->thread.dsp_status.status |= SR_DSP; return; } #endif @@ -641,7 +640,7 @@ asmlinkage void do_reserved_inst(void) error_code = lookup_exception_vector(); local_irq_enable(); - force_sig(SIGILL, tsk); + force_sig(SIGILL); die_if_no_fixup("reserved instruction", regs, error_code); } @@ -697,7 +696,6 @@ asmlinkage void do_illegal_slot_inst(void) { struct pt_regs *regs = current_pt_regs(); unsigned long inst; - struct task_struct *tsk = current; if (kprobe_handle_illslot(regs->pc) == 0) return; @@ -716,7 +714,7 @@ asmlinkage void do_illegal_slot_inst(void) inst = lookup_exception_vector(); local_irq_enable(); - force_sig(SIGILL, tsk); + force_sig(SIGILL); die_if_no_fixup("illegal slot instruction", regs, inst); } diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 8ce90a7da67d..37046f3a26d3 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -599,7 +599,7 @@ static void do_unhandled_exception(int signr, char *str, unsigned long error, struct pt_regs *regs) { if (user_mode(regs)) - force_sig(signr, current); + force_sig(signr); die_if_no_fixup(str, regs, error); } diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index a0fa8fc88739..e8be0eca0444 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -560,7 +560,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) task_thread_info(tsk)->status |= TS_USEDFPU; } else { force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc, tsk); + (void __user *)regs->pc); } regs->pc = nextpc; diff --git a/arch/sh/mm/Makefile b/arch/sh/mm/Makefile index fbe5e79751b3..5051b38fd5b6 100644 --- a/arch/sh/mm/Makefile +++ b/arch/sh/mm/Makefile @@ -17,7 +17,7 @@ cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o obj-y += $(cacheops-y) mmu-y := nommu.o extable_32.o -mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \ +mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o ioremap.o kmap.o \ pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o obj-y += $(mmu-y) diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index e5539e0f8e3b..4c1ca197e9c5 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -63,13 +63,8 @@ static const struct file_operations asids_debugfs_fops = { static int __init asids_debugfs_init(void) { - struct dentry *asids_dentry; - - asids_dentry = debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir, - NULL, &asids_debugfs_fops); - if (!asids_dentry) - return -ENOMEM; - - return PTR_ERR_OR_ZERO(asids_dentry); + debugfs_create_file("asids", S_IRUSR, arch_debugfs_dir, NULL, + &asids_debugfs_fops); + return 0; } device_initcall(asids_debugfs_init); diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 4eb9d43578b4..17d780794497 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -109,22 +109,10 @@ static const struct file_operations cache_debugfs_fops = { static int __init cache_debugfs_init(void) { - struct dentry *dcache_dentry, *icache_dentry; - - dcache_dentry = debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir, - (unsigned int *)CACHE_TYPE_DCACHE, - &cache_debugfs_fops); - if (!dcache_dentry) - return -ENOMEM; - - icache_dentry = debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir, - (unsigned int *)CACHE_TYPE_ICACHE, - &cache_debugfs_fops); - if (!icache_dentry) { - debugfs_remove(dcache_dentry); - return -ENOMEM; - } - + debugfs_create_file("dcache", S_IRUSR, arch_debugfs_dir, + (void *)CACHE_TYPE_DCACHE, &cache_debugfs_fops); + debugfs_create_file("icache", S_IRUSR, arch_debugfs_dir, + (void *)CACHE_TYPE_ICACHE, &cache_debugfs_fops); return 0; } module_init(cache_debugfs_init); diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 6defd2c6d9b1..3093bc372138 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -39,10 +39,9 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap) } static void -force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk) +force_sig_info_fault(int si_signo, int si_code, unsigned long address) { - force_sig_fault(si_signo, si_code, (void __user *)address, tsk); + force_sig_fault(si_signo, si_code, (void __user *)address); } /* @@ -244,8 +243,6 @@ static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, unsigned long address, int si_code) { - struct task_struct *tsk = current; - /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { /* @@ -253,7 +250,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, */ local_irq_enable(); - force_sig_info_fault(SIGSEGV, si_code, address, tsk); + force_sig_info_fault(SIGSEGV, si_code, address); return; } @@ -308,7 +305,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) if (!user_mode(regs)) no_context(regs, error_code, address); - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); + force_sig_info_fault(SIGBUS, BUS_ADRERR, address); } static noinline int diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c deleted file mode 100644 index 277c882f7489..000000000000 --- a/arch/sh/mm/gup.c +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for SuperH - * - * Copyright (C) 2009 - 2010 Paul Mundt - * - * Cloned from the x86 and PowerPC versions, by: - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/highmem.h> -#include <asm/pgtable.h> - -static inline pte_t gup_get_pte(pte_t *ptep) -{ -#ifndef CONFIG_X2TLB - return READ_ONCE(*ptep); -#else - /* - * With get_user_pages_fast, we walk down the pagetables without - * taking any locks. For this we would like to load the pointers - * atomically, but that is not possible with 64-bit PTEs. What - * we do have is the guarantee that a pte will only either go - * from not present to present, or present to not present or both - * -- it will not switch to a completely different present page - * without a TLB flush in between; something that we are blocking - * by holding interrupts off. - * - * Setting ptes from not present to present goes: - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees l iff pte_high - * sees h. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have got rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. And get_user_pages_fast only operates on present ptes, so - * we're safe. - * - * gup_get_pte should not be used or copied outside gup.c without being - * very careful -- it does not atomically load the pte or anything that - * is likely to be useful for you. - */ - pte_t pte; - -retry: - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - if (unlikely(pte.pte_low != ptep->pte_low)) - goto retry; - - return pte; -#endif -} - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - u64 mask, result; - pte_t *ptep; - -#ifdef CONFIG_X2TLB - result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ); - if (write) - result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE); -#elif defined(CONFIG_SUPERH64) - result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; - if (write) - result |= _PAGE_WRITE; -#else - result = _PAGE_PRESENT | _PAGE_USER; - if (write) - result |= _PAGE_RW; -#endif - - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_map(&pmd, addr); - do { - pte_t pte = gup_get_pte(ptep); - struct page *page; - - if ((pte_val(pte) & mask) != result) { - pte_unmap(ptep); - return 0; - } - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - page = pte_page(pte); - get_page(page); - __flush_anon_page(page, addr); - flush_dcache_page(page); - pages[*nr] = page; - (*nr)++; - - } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (!gup_pte_range(pmd, addr, next, write, pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Like get_user_pages_fast() except its IRQ-safe in that it won't fall - * back to the regular GUP. - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - unsigned long flags; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - if (unlikely(!access_ok((void __user *)start, len))) - return 0; - - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables and pages from being freed. - */ - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -/** - * get_user_pages_fast() - pin user pages in memory - * @start: starting user address - * @nr_pages: number of pages from start to pin - * @gup_flags: flags modifying pin behaviour - * @pages: array that receives pointers to the pages pinned. - * Should be at least nr_pages long. - * - * Attempt to pin user pages in memory without taking mm->mmap_sem. - * If not successful, it will fall back to taking the lock and - * calling get_user_pages(). - * - * Returns number of pages pinned. This may be fewer than the number - * requested. If nr_pages is 0 or negative, returns 0. If no pages - * were pinned, returns -errno. - */ -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - - end = start + len; - if (end < start) - goto slow_irqon; - - local_irq_disable(); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); -slow_irqon: - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, pages, - gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index a53a040d0054..b59bad86b31e 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -861,13 +861,8 @@ static const struct file_operations pmb_debugfs_fops = { static int __init pmb_debugfs_init(void) { - struct dentry *dentry; - - dentry = debugfs_create_file("pmb", S_IFREG | S_IRUGO, - arch_debugfs_dir, NULL, &pmb_debugfs_fops); - if (!dentry) - return -ENOMEM; - + debugfs_create_file("pmb", S_IFREG | S_IRUGO, arch_debugfs_dir, NULL, + &pmb_debugfs_fops); return 0; } subsys_initcall(pmb_debugfs_init); diff --git a/arch/sh/mm/tlb-debugfs.c b/arch/sh/mm/tlb-debugfs.c index dea637a09246..11c6148283f3 100644 --- a/arch/sh/mm/tlb-debugfs.c +++ b/arch/sh/mm/tlb-debugfs.c @@ -149,22 +149,10 @@ static const struct file_operations tlb_debugfs_fops = { static int __init tlb_debugfs_init(void) { - struct dentry *itlb, *utlb; - - itlb = debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir, - (unsigned int *)TLB_TYPE_ITLB, - &tlb_debugfs_fops); - if (unlikely(!itlb)) - return -ENOMEM; - - utlb = debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir, - (unsigned int *)TLB_TYPE_UTLB, - &tlb_debugfs_fops); - if (unlikely(!utlb)) { - debugfs_remove(itlb); - return -ENOMEM; - } - + debugfs_create_file("itlb", S_IRUSR, arch_debugfs_dir, + (void *)TLB_TYPE_ITLB, &tlb_debugfs_fops); + debugfs_create_file("utlb", S_IRUSR, arch_debugfs_dir, + (void *)TLB_TYPE_UTLB, &tlb_debugfs_fops); return 0; } module_init(tlb_debugfs_init); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 26ab6f5bbaaf..e9f5d62e9817 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -28,6 +28,7 @@ config SPARC select RTC_DRV_M48T59 select RTC_SYSTOHC select HAVE_ARCH_JUMP_LABEL if SPARC64 + select HAVE_FAST_GUP if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP @@ -300,9 +301,6 @@ config NODES_SPAN_OTHER_NODES def_bool y depends on NEED_MULTIPLE_NODES -config ARCH_SELECT_MEMORY_MODEL - def_bool y if SPARC64 - config ARCH_SPARSEMEM_ENABLE def_bool y if SPARC64 select SPARSEMEM_VMEMMAP_ENABLE diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig index 2d4f34c52c67..7b3efe5edc1a 100644 --- a/arch/sparc/configs/sparc32_defconfig +++ b/arch/sparc/configs/sparc32_defconfig @@ -27,7 +27,6 @@ CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NET_PKTGEN=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_RAM=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index ea547d596fcf..6c325d53a20a 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -57,7 +57,6 @@ CONFIG_IPV6_TUNNEL=m CONFIG_VLAN_8021Q=m CONFIG_NET_PKTGEN=m CONFIG_NET_TCPPROBE=m -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_CONNECTOR=m CONFIG_BLK_DEV_LOOP=m diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 6963482c81d8..b60448397d4f 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -23,15 +23,15 @@ #define ATOMIC_OP(op) \ void atomic_##op(int, atomic_t *); \ -void atomic64_##op(long, atomic64_t *); +void atomic64_##op(s64, atomic64_t *); #define ATOMIC_OP_RETURN(op) \ int atomic_##op##_return(int, atomic_t *); \ -long atomic64_##op##_return(long, atomic64_t *); +s64 atomic64_##op##_return(s64, atomic64_t *); #define ATOMIC_FETCH_OP(op) \ int atomic_fetch_##op(int, atomic_t *); \ -long atomic64_fetch_##op(long, atomic64_t *); +s64 atomic64_fetch_##op(s64, atomic64_t *); #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op) @@ -61,7 +61,7 @@ static inline int atomic_xchg(atomic_t *v, int new) ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -long atomic64_dec_if_positive(atomic64_t *v); +s64 atomic64_dec_if_positive(atomic64_t *v); #define atomic64_dec_if_positive atomic64_dec_if_positive #endif /* !(__ARCH_SPARC64_ATOMIC__) */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 22500c3be7a9..1599de730532 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -864,6 +864,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud) #define pgd_present(pgd) (pgd_val(pgd) != 0U) #define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL) +/* only used by the stubbed out hugetlb gup code, should never be called */ +#define pgd_page(pgd) NULL + static inline unsigned long pud_large(pud_t pud) { pte_t pte = __pte(pud_val(pud)); @@ -1075,6 +1078,46 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, } #define io_remap_pfn_range io_remap_pfn_range +static inline unsigned long untagged_addr(unsigned long start) +{ + if (adi_capable()) { + long addr = start; + + /* If userspace has passed a versioned address, kernel + * will not find it in the VMAs since it does not store + * the version tags in the list of VMAs. Storing version + * tags in list of VMAs is impractical since they can be + * changed any time from userspace without dropping into + * kernel. Any address search in VMAs will be done with + * non-versioned addresses. Ensure the ADI version bits + * are dropped here by sign extending the last bit before + * ADI bits. IOMMU does not implement version tags. + */ + return (addr << (long)adi_nbits()) >> (long)adi_nbits(); + } + + return start; +} +#define untagged_addr untagged_addr + +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + u64 prot; + + if (tlb_type == hypervisor) { + prot = _PAGE_PRESENT_4V | _PAGE_P_4V; + if (write) + prot |= _PAGE_WRITE_4V; + } else { + prot = _PAGE_PRESENT_4U | _PAGE_P_4U; + if (write) + prot |= _PAGE_WRITE_4U; + } + + return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot; +} +#define pte_access_permitted pte_access_permitted + #include <asm/tlbflush.h> #include <asm-generic/pgtable.h> diff --git a/arch/sparc/include/uapi/asm/openpromio.h b/arch/sparc/include/uapi/asm/openpromio.h index 8817f7d1a70c..d4494b679e99 100644 --- a/arch/sparc/include/uapi/asm/openpromio.h +++ b/arch/sparc/include/uapi/asm/openpromio.h @@ -4,7 +4,6 @@ #include <linux/compiler.h> #include <linux/ioctl.h> -#include <linux/types.h> /* * SunOS and Solaris /dev/openprom definitions. The ioctl values @@ -13,7 +12,7 @@ struct openpromio { - u_int oprom_size; /* Actual size of the oprom_array. */ + unsigned int oprom_size; /* Actual size of the oprom_array. */ char oprom_array[1]; /* Holds property names and values. */ }; diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 9265a9eece15..8029b681fc7c 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -115,6 +115,8 @@ #define SO_RCVTIMEO_NEW 0x0044 #define SO_SNDTIMEO_NEW 0x0045 +#define SO_DETACH_REUSEPORT_BPF 0x0047 + #if !defined(__KERNEL__) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 59eaf6227af1..4282116e28e7 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -519,7 +519,7 @@ void synchronize_user_stack(void) static void stack_unaligned(unsigned long sp) { - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0); } static const char uwfault32[] = KERN_INFO \ @@ -570,7 +570,7 @@ void fault_in_user_windows(struct pt_regs *regs) barf: set_thread_wsaved(window + 1); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage long sparc_do_fork(unsigned long clone_flags, diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index e800ce13cc6e..a237810aa9f4 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -170,7 +170,7 @@ void do_sigreturn32(struct pt_regs *regs) return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) @@ -256,7 +256,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) @@ -375,7 +375,7 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs, pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } @@ -509,7 +509,7 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 83953780ca01..42c3de313fd6 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -137,7 +137,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) return; segv_and_exit: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } asmlinkage void do_rt_sigreturn(struct pt_regs *regs) @@ -196,7 +196,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index ca70787efd8e..69ae814b7e90 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -134,7 +134,7 @@ out: exception_exit(prev_state); return; do_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -228,7 +228,7 @@ out: exception_exit(prev_state); return; do_sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); goto out; } @@ -320,7 +320,7 @@ void do_rt_sigreturn(struct pt_regs *regs) set_current_blocked(&set); return; segv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize) @@ -374,7 +374,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n", current->comm, current->pid, (unsigned long)sf, regs->tpc, regs->u_regs[UREG_I7]); - force_sigsegv(ksig->sig, current); + force_sigsegv(ksig->sig); return -EINVAL; } diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index 452e4d080855..be77538bc038 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -151,7 +151,7 @@ sparc_breakpoint (struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc); #endif - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 9825ca6a6020..ccc88926bc00 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -511,7 +511,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc); #endif - force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc); #endif diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index e047480b1605..c58e71f21129 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -479,3 +479,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index bcdfc6168dd5..4ceecad556a9 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -103,7 +103,7 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type) die_if_kernel("Kernel bad trap", regs); force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)regs->pc, type - 0x80, current); + (void __user *)regs->pc, type - 0x80); } void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc, @@ -327,7 +327,7 @@ void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n", pc, npc, psr); #endif - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0); } void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc, diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 04aa588d5dd1..27778b65a965 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -108,7 +108,7 @@ void bad_trap(struct pt_regs *regs, long lvl) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGILL, ILL_ILLTRP, - (void __user *)regs->tpc, lvl, current); + (void __user *)regs->tpc, lvl); } void bad_trap_tl1(struct pt_regs *regs, long lvl) @@ -202,7 +202,7 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un regs->tnpc &= 0xffffffff; } force_sig_fault(SIGSEGV, SEGV_MAPERR, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -237,7 +237,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0); } void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) @@ -322,7 +322,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0); out: exception_exit(prev_state); } @@ -386,16 +386,13 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig */ switch (type) { case HV_FAULT_TYPE_INV_ASI: - force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0, - current); + force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0); break; case HV_FAULT_TYPE_MCD_DIS: - force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0, - current); + force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0); break; default: - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0, - current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0); break; } } @@ -572,7 +569,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0); } void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) @@ -2074,7 +2071,7 @@ void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent) * code */ force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr, - 0, current); + 0); } /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. @@ -2182,13 +2179,13 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, addr += PAGE_SIZE; } } - force_sig(SIGKILL, current); + force_sig(SIGKILL); return true; } if (attrs & SUN4V_ERR_ATTRS_PIO) { force_sig_fault(SIGBUS, BUS_ADRERR, - (void __user *)sun4v_get_vaddr(regs), 0, current); + (void __user *)sun4v_get_vaddr(regs), 0); return true; } @@ -2345,7 +2342,7 @@ static void do_fpe_common(struct pt_regs *regs) code = FPE_FLTRES; } force_sig_fault(SIGFPE, code, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); } } @@ -2400,7 +2397,7 @@ void do_tof(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGEMT, EMT_TAGOVF, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -2420,7 +2417,7 @@ void do_div0(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGFPE, FPE_INTDIV, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } @@ -2616,7 +2613,7 @@ void do_illegal_instruction(struct pt_regs *regs) } } } - force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0); out: exception_exit(prev_state); } @@ -2636,7 +2633,7 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0); out: exception_exit(prev_state); } @@ -2654,7 +2651,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c if (is_no_fault_exception(regs)) return; - force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0); } /* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI @@ -2701,7 +2698,7 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr, regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current); + force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0); } void do_privop(struct pt_regs *regs) @@ -2717,7 +2714,7 @@ void do_privop(struct pt_regs *regs) regs->tnpc &= 0xffffffff; } force_sig_fault(SIGILL, ILL_PRVOPC, - (void __user *)regs->tpc, 0, current); + (void __user *)regs->tpc, 0); out: exception_exit(prev_state); } diff --git a/arch/sparc/lib/COPYING.LIB b/arch/sparc/lib/COPYING.LIB deleted file mode 100644 index eb685a5ec981..000000000000 --- a/arch/sparc/lib/COPYING.LIB +++ /dev/null @@ -1,481 +0,0 @@ - GNU LIBRARY GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the library GPL. It is - numbered 2 because it goes with version 2 of the ordinary GPL.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Library General Public License, applies to some -specially designated Free Software Foundation software, and to any -other libraries whose authors decide to use it. You can use it for -your libraries, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if -you distribute copies of the library, or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link a program with the library, you must provide -complete object files to the recipients so that they can relink them -with the library, after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - Our method of protecting your rights has two steps: (1) copyright -the library, and (2) offer you this license which gives you legal -permission to copy, distribute and/or modify the library. - - Also, for each distributor's protection, we want to make certain -that everyone understands that there is no warranty for this free -library. If the library is modified by someone else and passed on, we -want its recipients to know that what they have is not the original -version, so that any problems introduced by others will not reflect on -the original authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that companies distributing free -software will individually obtain patent licenses, thus in effect -transforming the program into proprietary software. To prevent this, -we have made it clear that any patent must be licensed for everyone's -free use or not licensed at all. - - Most GNU software, including some libraries, is covered by the ordinary -GNU General Public License, which was designed for utility programs. This -license, the GNU Library General Public License, applies to certain -designated libraries. This license is quite different from the ordinary -one; be sure to read it in full, and don't assume that anything in it is -the same as in the ordinary license. - - The reason we have a separate public license for some libraries is that -they blur the distinction we usually make between modifying or adding to a -program and simply using it. Linking a program with a library, without -changing the library, is in some sense simply using the library, and is -analogous to running a utility program or application program. However, in -a textual and legal sense, the linked executable is a combined work, a -derivative of the original library, and the ordinary General Public License -treats it as such. - - Because of this blurred distinction, using the ordinary General -Public License for libraries did not effectively promote software -sharing, because most developers did not use the libraries. We -concluded that weaker conditions might promote sharing better. - - However, unrestricted linking of non-free programs would deprive the -users of those programs of all benefit from the free status of the -libraries themselves. This Library General Public License is intended to -permit developers of non-free programs to use free libraries, while -preserving your freedom as a user of such programs to change the free -libraries that are incorporated in them. (We have not seen how to achieve -this as regards changes in header files, but we have achieved it as regards -changes in the actual functions of the Library.) The hope is that this -will lead to faster development of free libraries. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, while the latter only -works together with the library. - - Note that it is possible for a library to be covered by the ordinary -General Public License rather than by this special one. - - GNU LIBRARY GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library which -contains a notice placed by the copyright holder or other authorized -party saying it may be distributed under the terms of this Library -General Public License (also called "this License"). Each licensee is -addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also compile or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - c) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - d) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the source code distributed need not include anything that is normally -distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Library General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - Appendix: How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - <one line to give the library's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with this library; if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - <signature of Ty Coon>, 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S index 97e2678d042a..d91d6b5f2444 100644 --- a/arch/sparc/lib/NG4clear_page.S +++ b/arch/sparc/lib/NG4clear_page.S @@ -27,4 +27,4 @@ NG4clear_user_page: /* %o0=dest, %o1=vaddr */ retl nop .size NG4clear_page,.-NG4clear_page - .size NG4clear_user_page,.-NG4clear_user_page
\ No newline at end of file + .size NG4clear_user_page,.-NG4clear_user_page diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile index d39075b1e3b7..b078205b70e0 100644 --- a/arch/sparc/mm/Makefile +++ b/arch/sparc/mm/Makefile @@ -5,7 +5,7 @@ asflags-y := -ansi ccflags-y := -Werror -obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o +obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o obj-y += fault_$(BITS).o obj-y += init_$(BITS).o obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index b0440b0edd97..8d69de111470 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -131,7 +131,7 @@ static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs, show_signal_msg(regs, sig, code, addr, current); - force_sig_fault(sig, code, (void __user *) addr, 0, current); + force_sig_fault(sig, code, (void __user *) addr, 0); } static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) @@ -425,7 +425,7 @@ do_sigbus: static void check_stack_aligned(unsigned long sp) { if (sp & 0x7UL) - force_sig(SIGILL, current); + force_sig(SIGILL); } void window_overflow_fault(void) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 8f8a604c1300..83fda4d9c3b2 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -187,7 +187,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, if (unlikely(show_unhandled_signals)) show_signal_msg(regs, sig, code, addr, current); - force_sig_fault(sig, code, (void __user *) addr, 0, current); + force_sig_fault(sig, code, (void __user *) addr, 0); } static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c deleted file mode 100644 index 1e770a517d4a..000000000000 --- a/arch/sparc/mm/gup.c +++ /dev/null @@ -1,340 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Lockless get_user_pages_fast for sparc, cribbed from powerpc - * - * Copyright (C) 2008 Nick Piggin - * Copyright (C) 2008 Novell Inc. - */ - -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/vmstat.h> -#include <linux/pagemap.h> -#include <linux/rwsem.h> -#include <asm/pgtable.h> -#include <asm/adi.h> - -/* - * The performance critical leaf functions are made noinline otherwise gcc - * inlines everything into a single function which results in too much - * register pressure. - */ -static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) -{ - unsigned long mask, result; - pte_t *ptep; - - if (tlb_type == hypervisor) { - result = _PAGE_PRESENT_4V|_PAGE_P_4V; - if (write) - result |= _PAGE_WRITE_4V; - } else { - result = _PAGE_PRESENT_4U|_PAGE_P_4U; - if (write) - result |= _PAGE_WRITE_4U; - } - mask = result | _PAGE_SPECIAL; - - ptep = pte_offset_kernel(&pmd, addr); - do { - struct page *page, *head; - pte_t pte = *ptep; - - if ((pte_val(pte) & mask) != result) - return 0; - VM_BUG_ON(!pfn_valid(pte_pfn(pte))); - - /* The hugepage case is simplified on sparc64 because - * we encode the sub-page pfn offsets into the - * hugepage PTEs. We could optimize this in the future - * use page_cache_add_speculative() for the hugepage case. - */ - page = pte_page(pte); - head = compound_head(page); - if (!page_cache_get_speculative(head)) - return 0; - if (unlikely(pte_val(pte) != pte_val(*ptep))) { - put_page(head); - return 0; - } - - pages[*nr] = page; - (*nr)++; - } while (ptep++, addr += PAGE_SIZE, addr != end); - - return 1; -} - -static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, - unsigned long end, int write, struct page **pages, - int *nr) -{ - struct page *head, *page; - int refs; - - if (!(pmd_val(pmd) & _PAGE_VALID)) - return 0; - - if (write && !pmd_write(pmd)) - return 0; - - refs = 0; - page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - head = compound_head(page); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, - unsigned long end, int write, struct page **pages, - int *nr) -{ - struct page *head, *page; - int refs; - - if (!(pud_val(pud) & _PAGE_VALID)) - return 0; - - if (write && !pud_write(pud)) - return 0; - - refs = 0; - page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - head = compound_head(page); - do { - VM_BUG_ON(compound_head(page) != head); - pages[*nr] = page; - (*nr)++; - page++; - refs++; - } while (addr += PAGE_SIZE, addr != end); - - if (!page_cache_add_speculative(head, refs)) { - *nr -= refs; - return 0; - } - - if (unlikely(pud_val(pud) != pud_val(*pudp))) { - *nr -= refs; - while (refs--) - put_page(head); - return 0; - } - - return 1; -} - -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pmd_t *pmdp; - - pmdp = pmd_offset(&pud, addr); - do { - pmd_t pmd = *pmdp; - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - return 0; - if (unlikely(pmd_large(pmd))) { - if (!gup_huge_pmd(pmdp, pmd, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pte_range(pmd, addr, next, write, - pages, nr)) - return 0; - } while (pmdp++, addr = next, addr != end); - - return 1; -} - -static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, - int write, struct page **pages, int *nr) -{ - unsigned long next; - pud_t *pudp; - - pudp = pud_offset(&pgd, addr); - do { - pud_t pud = *pudp; - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - return 0; - if (unlikely(pud_large(pud))) { - if (!gup_huge_pud(pudp, pud, addr, next, - write, pages, nr)) - return 0; - } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) - return 0; - } while (pudp++, addr = next, addr != end); - - return 1; -} - -/* - * Note a difference with get_user_pages_fast: this always returns the - * number of pages pinned, 0 if no pages were pinned. - */ -int __get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next, flags; - pgd_t *pgdp; - int nr = 0; - -#ifdef CONFIG_SPARC64 - if (adi_capable()) { - long addr = start; - - /* If userspace has passed a versioned address, kernel - * will not find it in the VMAs since it does not store - * the version tags in the list of VMAs. Storing version - * tags in list of VMAs is impractical since they can be - * changed any time from userspace without dropping into - * kernel. Any address search in VMAs will be done with - * non-versioned addresses. Ensure the ADI version bits - * are dropped here by sign extending the last bit before - * ADI bits. IOMMU does not implement version tags. - */ - addr = (addr << (long)adi_nbits()) >> (long)adi_nbits(); - start = addr; - } -#endif - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - local_irq_save(flags); - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - break; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) - break; - } while (pgdp++, addr = next, addr != end); - local_irq_restore(flags); - - return nr; -} - -int get_user_pages_fast(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages) -{ - struct mm_struct *mm = current->mm; - unsigned long addr, len, end; - unsigned long next; - pgd_t *pgdp; - int nr = 0; - -#ifdef CONFIG_SPARC64 - if (adi_capable()) { - long addr = start; - - /* If userspace has passed a versioned address, kernel - * will not find it in the VMAs since it does not store - * the version tags in the list of VMAs. Storing version - * tags in list of VMAs is impractical since they can be - * changed any time from userspace without dropping into - * kernel. Any address search in VMAs will be done with - * non-versioned addresses. Ensure the ADI version bits - * are dropped here by sign extending the last bit before - * ADI bits. IOMMU does not implements version tags, - */ - addr = (addr << (long)adi_nbits()) >> (long)adi_nbits(); - start = addr; - } -#endif - start &= PAGE_MASK; - addr = start; - len = (unsigned long) nr_pages << PAGE_SHIFT; - end = start + len; - - /* - * XXX: batch / limit 'nr', to avoid large irq off latency - * needs some instrumenting to determine the common sizes used by - * important workloads (eg. DB2), and whether limiting the batch size - * will decrease performance. - * - * It seems like we're in the clear for the moment. Direct-IO is - * the main guy that batches up lots of get_user_pages, and even - * they are limited to 64-at-a-time which is not so many. - */ - /* - * This doesn't prevent pagetable teardown, but does prevent - * the pagetables from being freed on sparc. - * - * So long as we atomically load page table pointers versus teardown, - * we can follow the address down to the the page and take a ref on it. - */ - local_irq_disable(); - - pgdp = pgd_offset(mm, addr); - do { - pgd_t pgd = *pgdp; - - next = pgd_addr_end(addr, end); - if (pgd_none(pgd)) - goto slow; - if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, - pages, &nr)) - goto slow; - } while (pgdp++, addr = next, addr != end); - - local_irq_enable(); - - VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); - return nr; - - { - int ret; - -slow: - local_irq_enable(); - - /* Try to get the remaining pages with get_user_pages */ - start += nr << PAGE_SHIFT; - pages += nr; - - ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, pages, - gup_flags); - - /* Have to be a bit careful with return values */ - if (nr > 0) { - if (ret < 0) - ret = nr; - else - ret += nr; - } - - return ret; - } -} diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 65428e79b2f3..3364e2a00989 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -908,6 +908,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: emit_alu3_K(SRL, src, 0, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_MOV | BPF_X: emit_reg_move(src, dst, ctx); @@ -942,6 +944,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case BPF_ALU | BPF_DIV | BPF_X: emit_write_y(G0, ctx); emit_alu(DIV, src, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_DIV | BPF_X: emit_alu(UDIVX, src, dst, ctx); @@ -975,6 +979,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_RSH | BPF_X: emit_alu(SRL, src, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_RSH | BPF_X: emit_alu(SRLX, src, dst, ctx); @@ -997,9 +1003,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) case 16: emit_alu_K(SLL, dst, 16, ctx); emit_alu_K(SRL, dst, 16, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case 32: - emit_alu_K(SRL, dst, 0, ctx); + if (!ctx->prog->aux->verifier_zext) + emit_alu_K(SRL, dst, 0, ctx); break; case 64: /* nop */ @@ -1021,6 +1030,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_alu3_K(AND, dst, 0xff, dst, ctx); emit_alu3_K(SLL, tmp, 8, tmp, ctx); emit_alu(OR, tmp, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case 32: @@ -1037,6 +1048,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_alu3_K(AND, dst, 0xff, dst, ctx); /* dst = dst & 0xff */ emit_alu3_K(SLL, dst, 24, dst, ctx); /* dst = dst << 24 */ emit_alu(OR, tmp, dst, ctx); /* dst = dst | tmp */ + if (insn_is_zext(&insn[1])) + return 1; break; case 64: @@ -1050,6 +1063,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) /* dst = imm */ case BPF_ALU | BPF_MOV | BPF_K: emit_loadimm32(imm, dst, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_MOV | BPF_K: emit_loadimm_sext(imm, dst, ctx); @@ -1132,6 +1147,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_RSH | BPF_K: emit_alu_K(SRL, dst, imm, ctx); + if (insn_is_zext(&insn[1])) + return 1; break; case BPF_ALU64 | BPF_RSH | BPF_K: emit_alu_K(SRLX, dst, imm, ctx); @@ -1144,7 +1161,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; do_alu32_trunc: - if (BPF_CLASS(code) == BPF_ALU) + if (BPF_CLASS(code) == BPF_ALU && + !ctx->prog->aux->verifier_zext) emit_alu_K(SRL, dst, 0, ctx); break; @@ -1265,6 +1283,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) rs2 = RS2(tmp); } emit(opcode | RS1(src) | rs2 | RD(dst), ctx); + if (opcode != LD64 && insn_is_zext(&insn[1])) + return 1; break; } /* ST: *(size *)(dst + off) = imm */ @@ -1432,6 +1452,11 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = 0x91d02005; /* ta 5 */ } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct sparc64_jit_data { struct bpf_binary_header *header; u8 *image; diff --git a/arch/um/Makefile b/arch/um/Makefile index 273130cf91d1..d2daa206872d 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -73,7 +73,7 @@ KBUILD_AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -I%,,$(KBUILD_CFLAGS))) \ $(ARCH_INCLUDE) $(MODE_INCLUDE) $(filter -I%,$(CFLAGS)) \ -D_FILE_OFFSET_BITS=64 -idirafter $(srctree)/include \ - -idirafter $(obj)/include -D__KERNEL__ -D__UM_HOST__ + -idirafter $(objtree)/include -D__KERNEL__ -D__UM_HOST__ #This will adjust *FLAGS accordingly to the platform. include $(ARCH_DIR)/Makefile-os-$(OS) diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 99eb5682792a..d7b282e9c4d5 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -10,6 +10,8 @@ #include <linux/mm.h> +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte))) @@ -25,20 +27,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pte_t *pte_alloc_one_kernel(struct mm_struct *); -extern pgtable_t pte_alloc_one(struct mm_struct *); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long) pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb,pte, address) \ do { \ pgtable_page_dtor(pte); \ diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index a43d42bf0a86..783b9247161f 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -32,7 +32,7 @@ void flush_thread(void) if (ret) { printk(KERN_ERR "flush_thread - clearing address space failed, " "err = %d\n", ret); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } get_safe_registers(current_pt_regs()->regs.gp, current_pt_regs()->regs.fp); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index a9c9a94c096f..de58e976b9bc 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -208,28 +208,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long) pgd); } -pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - pte_t *pte; - - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); - return pte; -} - -pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - - pte = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - #ifdef CONFIG_3_LEVEL_PGTABLES pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) { diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 5f47422401e1..da1e96b1ec3e 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -112,13 +112,12 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, - int error_code) +static void send_sigtrap(struct uml_pt_regs *regs, int error_code) { /* Send us the fake SIGTRAP */ force_sig_fault(SIGTRAP, TRAP_BRKPT, /* User-mode eip? */ - UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk); + UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL); } /* @@ -147,7 +146,7 @@ void syscall_trace_leave(struct pt_regs *regs) /* Fake a debug trap */ if (ptraced & PT_DTRACE) - send_sigtrap(current, ®s->regs, 0); + send_sigtrap(®s->regs, 0); if (!test_thread_flag(TIF_SYSCALL_TRACE)) return; diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 7a1f2a936fd1..29e7f5f9f188 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -119,7 +119,7 @@ void uml_setup_stubs(struct mm_struct *mm) return; out: - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); } void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 8347161c2ae0..45f739bf302f 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -329,7 +329,7 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, "process: %d\n", task_tgid_vnr(current)); /* We are under mmap_sem, release it such that current can terminate */ up_write(¤t->mm->mmap_sem); - force_sig(SIGKILL, current); + force_sig(SIGKILL); do_signal(¤t->thread.regs); } } @@ -487,7 +487,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) kill: printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address); - force_sig(SIGKILL, current); + force_sig(SIGKILL); } pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 0e8b6158f224..58fe36856182 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -163,13 +163,12 @@ static void show_segv_info(struct uml_pt_regs *regs) static void bad_segv(struct faultinfo fi, unsigned long ip) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi), - current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi)); } void fatal_sigsegv(void) { - force_sigsegv(SIGSEGV, current); + force_sigsegv(SIGSEGV); do_signal(¤t->thread.regs); /* * This is to tell gcc that we're not returning - do_signal @@ -268,13 +267,11 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (err == -EACCES) { current->thread.arch.faultinfo = fi; - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, - current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } else { BUG_ON(err != -EFAULT); current->thread.arch.faultinfo = fi; - force_sig_fault(SIGSEGV, si_code, (void __user *) address, - current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address); } out: @@ -304,12 +301,11 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { struct faultinfo *fi = UPT_FAULTINFO(regs); current->thread.arch.faultinfo = *fi; - force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi), - current); + force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi)); } else { printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", sig, code, err); - force_sig(sig, current); + force_sig(sig); } } diff --git a/arch/unicore32/Makefile b/arch/unicore32/Makefile index 98a5ca43ae87..390819947c37 100644 --- a/arch/unicore32/Makefile +++ b/arch/unicore32/Makefile @@ -41,8 +41,7 @@ libs-y += arch/unicore32/lib/ boot := arch/unicore32/boot -# Default defconfig and target when executing plain make -KBUILD_DEFCONFIG := $(ARCH)_defconfig +# Default target when executing plain make KBUILD_IMAGE := $(boot)/zImage all: zImage diff --git a/arch/unicore32/configs/unicore32_defconfig b/arch/unicore32/configs/defconfig index 360cc9abcdb0..360cc9abcdb0 100644 --- a/arch/unicore32/configs/unicore32_defconfig +++ b/arch/unicore32/configs/defconfig diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index ec64834b1c6a..3f0903bd98e9 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -14,6 +14,10 @@ #include <asm/cacheflush.h> #include <asm/tlbflush.h> +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include <asm-generic/pgalloc.h> + #define check_pgt_cache() do { } while (0) #define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT) @@ -25,17 +29,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd); #define pgd_alloc(mm) get_pgd_slow(mm) #define pgd_free(mm, pgd) free_pgd_slow(mm, pgd) -#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) - /* * Allocate one PTE table. */ static inline pte_t * pte_alloc_one_kernel(struct mm_struct *mm) { - pte_t *pte; + pte_t *pte = __pte_alloc_one_kernel(mm); - pte = (pte_t *)__get_free_page(PGALLOC_GFP); if (pte) clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t)); @@ -47,35 +48,14 @@ pte_alloc_one(struct mm_struct *mm) { struct page *pte; - pte = alloc_pages(PGALLOC_GFP, 0); + pte = __pte_alloc_one(mm, GFP_PGTABLE_USER); if (!pte) return NULL; - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); - } - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - } - + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); return pte; } -/* - * Free one PTE table. - */ -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - if (pte) - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval) { set_pmd(pmdp, __pmd(pmdval)); diff --git a/arch/unicore32/include/mach/regs-gpio.h b/arch/unicore32/include/mach/regs-gpio.h index 806350e1ccb6..5fc701ee33e3 100644 --- a/arch/unicore32/include/mach/regs-gpio.h +++ b/arch/unicore32/include/mach/regs-gpio.h @@ -32,7 +32,7 @@ */ #define GPIO_GEDR (PKUNITY_GPIO_BASE + 0x0018) /* - * Sepcial Voltage Detect Reg GPIO_GPIR. + * Special Voltage Detect Reg GPIO_GPIR. */ #define GPIO_GPIR (PKUNITY_GPIO_BASE + 0x0020) diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c index e62f82bd1339..3946182a835d 100644 --- a/arch/unicore32/kernel/signal.c +++ b/arch/unicore32/kernel/signal.c @@ -126,7 +126,7 @@ asmlinkage int __sys_rt_sigreturn(struct pt_regs *regs) return regs->UCreg_00; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -383,7 +383,7 @@ static void do_signal(struct pt_regs *regs, int syscall) regs->UCreg_pc = KERN_RESTART_CODE; } else { regs->UCreg_sp += 4; - force_sigsegv(0, current); + force_sigsegv(0); } } if (regs->UCreg_00 == -ERESTARTNOHAND || diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 1c1f0ce20e19..e24f67283864 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -245,7 +245,7 @@ void uc32_notify_die(const char *str, struct pt_regs *regs, current->thread.error_code = err; current->thread.trap_no = trap; - force_sig_fault(sig, code, addr, current); + force_sig_fault(sig, code, addr); } else die(str, regs, err); } diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 33e0d8a267e8..76342de9cf8c 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -113,14 +113,15 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, * Something tried to access memory that isn't in our memory map.. * User mode accesses just cause a SIGSEGV */ -static void __do_user_fault(struct task_struct *tsk, unsigned long addr, - unsigned int fsr, unsigned int sig, int code, - struct pt_regs *regs) +static void __do_user_fault(unsigned long addr, unsigned int fsr, + unsigned int sig, int code, struct pt_regs *regs) { + struct task_struct *tsk = current; + tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; - force_sig_fault(sig, code, (void __user *)addr, tsk); + force_sig_fault(sig, code, (void __user *)addr); } void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) @@ -133,7 +134,7 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * have no context to handle this fault with. */ if (user_mode(regs)) - __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); else __do_kernel_fault(mm, addr, fsr, regs); } @@ -307,7 +308,7 @@ retry: code = fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, fsr, sig, code, regs); + __do_user_fault(addr, fsr, sig, code, regs); return 0; no_context: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2bbbd4d1ba31..d0bbca65e4a4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_32 select HAVE_DEBUG_STACKOVERFLOW select MODULES_USE_ELF_REL select OLD_SIGACTION + select GENERIC_VDSO_32 config X86_64 def_bool y @@ -121,6 +122,8 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY + select GUP_GET_PTE_LOW_HIGH if X86_PAE select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI @@ -156,6 +159,7 @@ config X86 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_EISA select HAVE_EXIT_THREAD + select HAVE_FAST_GUP select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER @@ -202,6 +206,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER + select HAVE_GENERIC_VDSO select HOTPLUG_SMT if SMP select IRQ_FORCED_THREADING select NEED_SG_DMA_LENGTH @@ -217,6 +222,7 @@ config X86 select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select X86_FEATURE_NAMES if PROC_FS + select PROC_PID_ARCH_STATUS if PROC_FS config INSTRUCTION_DECODER def_bool y @@ -395,7 +401,7 @@ config SMP Y to "Enhanced Real Time Clock Support", below. The "Advanced Power Management" code will be disabled if you say Y here. - See also <file:Documentation/x86/i386/IO-APIC.txt>, + See also <file:Documentation/x86/i386/IO-APIC.rst>, <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at <http://www.tldp.org/docs.html#howto>. @@ -781,6 +787,9 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. +config X86_HV_CALLBACK_VECTOR + def_bool n + source "arch/x86/xen/Kconfig" config KVM_GUEST @@ -832,6 +841,17 @@ config JAILHOUSE_GUEST cell. You can leave this option disabled if you only want to start Jailhouse and run Linux afterwards in the root cell. +config ACRN_GUEST + bool "ACRN Guest support" + depends on X86_64 + select X86_HV_CALLBACK_VECTOR + help + This option allows to run Linux as guest in the ACRN hypervisor. ACRN is + a flexible, lightweight reference open-source hypervisor, built with + real-time and safety-criticality in mind. It is built for embedded + IOT with small footprint and real-time features. More details can be + found in https://projectacrn.org/. + endif #HYPERVISOR_GUEST source "arch/x86/Kconfig.cpu" @@ -1290,7 +1310,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/microcode.txt. For that you need to enable + in Documentation/x86/microcode.rst. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -1329,7 +1349,7 @@ config MICROCODE_OLD_INTERFACE It is inadequate because it runs too late to be able to properly load microcode on a machine and it needs special tools. Instead, you should've switched to the early loading method with the initrd or - builtin microcode by now: Documentation/x86/microcode.txt + builtin microcode by now: Documentation/x86/microcode.rst config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" @@ -1478,7 +1498,7 @@ config X86_5LEVEL A kernel with the option enabled can be booted on machines that support 4- or 5-level paging. - See Documentation/x86/x86_64/5level-paging.txt for more + See Documentation/x86/x86_64/5level-paging.rst for more information. Say N if unsure. @@ -1626,7 +1646,7 @@ config ARCH_MEMORY_PROBE depends on X86_64 && MEMORY_HOTPLUG help This option enables a sysfs memory/probe interface for testing. - See Documentation/memory-hotplug.txt for more information. + See Documentation/admin-guide/mm/memory-hotplug.rst for more information. If you are unsure how to answer this question, answer N. config ARCH_PROC_KCORE_TEXT @@ -1783,7 +1803,7 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See <file:Documentation/x86/mtrr.txt> for more information. + See <file:Documentation/x86/mtrr.rst> for more information. config MTRR_SANITIZER def_bool y @@ -1895,7 +1915,7 @@ config X86_INTEL_MPX process and adds some branches to paths used during exec() and munmap(). - For details, see Documentation/x86/intel_mpx.txt + For details, see Documentation/x86/intel_mpx.rst If unsure, say N. @@ -1911,7 +1931,7 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/x86/protection-keys.txt + For details, see Documentation/core-api/protection-keys.rst If unsure, say y. @@ -2037,7 +2057,7 @@ config CRASH_DUMP to a memory address not used by the main kernel or BIOS using PHYSICAL_START, or it must be built as a relocatable image (CONFIG_RELOCATABLE=y). - For more details see Documentation/kdump/kdump.txt + For more details see Documentation/kdump/kdump.rst config KEXEC_JUMP bool "kexec jump" @@ -2074,7 +2094,7 @@ config PHYSICAL_START the reserved region. In other words, it can be set based on the "X" value as specified in the "crashkernel=YM@XM" command line boot parameter passed to the panic-ed - kernel. Please take a look at Documentation/kdump/kdump.txt + kernel. Please take a look at Documentation/kdump/kdump.rst for more details about crash dumps. Usage of bzImage for capturing the crash dump is recommended as @@ -2285,7 +2305,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_EMULATE + default LEGACY_VSYSCALL_XONLY help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in @@ -2293,23 +2313,38 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[emulate|none]. + line parameter vsyscall=[emulate|xonly|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty to improve security. - If unsure, select "Emulate". + If unsure, select "Emulate execution only". config LEGACY_VSYSCALL_EMULATE - bool "Emulate" + bool "Full emulation" + help + The kernel traps and emulates calls into the fixed vsyscall + address mapping. This makes the mapping non-executable, but + it still contains readable known contents, which could be + used in certain rare security vulnerability exploits. This + configuration is recommended when using legacy userspace + that still uses vsyscalls along with legacy binary + instrumentation tools that require code to be readable. + + An example of this type of legacy userspace is running + Pin on an old binary that still uses vsyscalls. + + config LEGACY_VSYSCALL_XONLY + bool "Emulate execution only" help - The kernel traps and emulates calls into the fixed - vsyscall address mapping. This makes the mapping - non-executable, but it still contains known contents, - which could be used in certain rare security vulnerability - exploits. This configuration is recommended when userspace - still uses the vsyscall area. + The kernel traps and emulates calls into the fixed vsyscall + address mapping and does not allow reads. This + configuration is recommended when userspace might use the + legacy vsyscall area but support for legacy binary + instrumentation of legacy code is not needed. It mitigates + certain uses of the vsyscall area as an ASLR-bypassing + buffer. config LEGACY_VSYSCALL_NONE bool "None" @@ -2698,6 +2733,7 @@ config OLPC select OF select OF_PROMTREE select IRQ_DOMAIN + select OLPC_EC ---help--- Add support for detecting the unique features of the OLPC XO hardware. @@ -2873,9 +2909,6 @@ config HAVE_ATOMIC_IOMAP config X86_DEV_DMA_OPS bool -config HAVE_GENERIC_GUP - def_bool y - source "drivers/firmware/Kconfig" source "arch/x86/kvm/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 6adce15268bd..8e29c991ba3e 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -480,3 +480,16 @@ config CPU_SUP_UMC_32 CPU might render the kernel unbootable. If unsure, say N. + +config CPU_SUP_ZHAOXIN + default y + bool "Support Zhaoxin processors" if PROCESSOR_SELECT + help + This enables detection, tunings and quirks for Zhaoxin processors + + You need this enabled if you want your kernel to run on a + Zhaoxin CPU. Disabling this option on other types of CPUs + makes the kernel a tiny bit smaller. Disabling it on a Zhaoxin + CPU might render the kernel unbootable. + + If unsure, say N. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index f730680dc818..71c92db47c41 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -156,7 +156,7 @@ config IOMMU_DEBUG code. When you use it make sure you have a big enough IOMMU/AGP aperture. Most of the options enabled by this can be set more finegrained using the iommu= command line - options. See Documentation/x86/x86_64/boot-options.txt for more + options. See Documentation/x86/x86_64/boot-options.rst for more details. config IOMMU_LEAK @@ -179,26 +179,6 @@ config X86_DECODER_SELFTEST decoder code. If unsure, say "N". -# -# IO delay types: -# - -config IO_DELAY_TYPE_0X80 - int - default "0" - -config IO_DELAY_TYPE_0XED - int - default "1" - -config IO_DELAY_TYPE_UDELAY - int - default "2" - -config IO_DELAY_TYPE_NONE - int - default "3" - choice prompt "IO delay type" default IO_DELAY_0X80 @@ -229,30 +209,6 @@ config IO_DELAY_NONE endchoice -if IO_DELAY_0X80 -config DEFAULT_IO_DELAY_TYPE - int - default IO_DELAY_TYPE_0X80 -endif - -if IO_DELAY_0XED -config DEFAULT_IO_DELAY_TYPE - int - default IO_DELAY_TYPE_0XED -endif - -if IO_DELAY_UDELAY -config DEFAULT_IO_DELAY_TYPE - int - default IO_DELAY_TYPE_UDELAY -endif - -if IO_DELAY_NONE -config DEFAULT_IO_DELAY_TYPE - int - default IO_DELAY_TYPE_NONE -endif - config DEBUG_BOOT_PARAMS bool "Debug boot parameters" depends on DEBUG_KERNEL diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index ad84239e595e..15255f388a85 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -44,17 +44,109 @@ static acpi_physical_address get_acpi_rsdp(void) return addr; } -/* Search EFI system tables for RSDP. */ -static acpi_physical_address efi_get_rsdp_addr(void) +/* + * Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and + * ACPI_TABLE_GUID are found, take the former, which has more features. + */ +static acpi_physical_address +__efi_get_rsdp_addr(unsigned long config_tables, unsigned int nr_tables, + bool efi_64) { acpi_physical_address rsdp_addr = 0; #ifdef CONFIG_EFI - unsigned long systab, systab_tables, config_tables; + int i; + + /* Get EFI tables from systab. */ + for (i = 0; i < nr_tables; i++) { + acpi_physical_address table; + efi_guid_t guid; + + if (efi_64) { + efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + + if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { + debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); + return 0; + } + } else { + efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables + i; + + guid = tbl->guid; + table = tbl->table; + } + + if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) + rsdp_addr = table; + else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) + return table; + } +#endif + return rsdp_addr; +} + +/* EFI/kexec support is 64-bit only. */ +#ifdef CONFIG_X86_64 +static struct efi_setup_data *get_kexec_setup_data_addr(void) +{ + struct setup_data *data; + u64 pa_data; + + pa_data = boot_params->hdr.setup_data; + while (pa_data) { + data = (struct setup_data *)pa_data; + if (data->type == SETUP_EFI) + return (struct efi_setup_data *)(pa_data + sizeof(struct setup_data)); + + pa_data = data->next; + } + return NULL; +} + +static acpi_physical_address kexec_get_rsdp_addr(void) +{ + efi_system_table_64_t *systab; + struct efi_setup_data *esd; + struct efi_info *ei; + char *sig; + + esd = (struct efi_setup_data *)get_kexec_setup_data_addr(); + if (!esd) + return 0; + + if (!esd->tables) { + debug_putstr("Wrong kexec SETUP_EFI data.\n"); + return 0; + } + + ei = &boot_params->efi_info; + sig = (char *)&ei->efi_loader_signature; + if (strncmp(sig, EFI64_LOADER_SIGNATURE, 4)) { + debug_putstr("Wrong kexec EFI loader signature.\n"); + return 0; + } + + /* Get systab from boot params. */ + systab = (efi_system_table_64_t *) (ei->efi_systab | ((__u64)ei->efi_systab_hi << 32)); + if (!systab) + error("EFI system table not found in kexec boot_params."); + + return __efi_get_rsdp_addr((unsigned long)esd->tables, systab->nr_tables, true); +} +#else +static acpi_physical_address kexec_get_rsdp_addr(void) { return 0; } +#endif /* CONFIG_X86_64 */ + +static acpi_physical_address efi_get_rsdp_addr(void) +{ +#ifdef CONFIG_EFI + unsigned long systab, config_tables; unsigned int nr_tables; struct efi_info *ei; bool efi_64; - int size, i; char *sig; ei = &boot_params->efi_info; @@ -88,49 +180,20 @@ static acpi_physical_address efi_get_rsdp_addr(void) config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_64_t); } else { efi_system_table_32_t *stbl = (efi_system_table_32_t *)systab; config_tables = stbl->tables; nr_tables = stbl->nr_tables; - size = sizeof(efi_config_table_32_t); } if (!config_tables) error("EFI config tables not found."); - /* Get EFI tables from systab. */ - for (i = 0; i < nr_tables; i++) { - acpi_physical_address table; - efi_guid_t guid; - - config_tables += size; - - if (efi_64) { - efi_config_table_64_t *tbl = (efi_config_table_64_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - - if (!IS_ENABLED(CONFIG_X86_64) && table >> 32) { - debug_putstr("Error getting RSDP address: EFI config table located above 4GB.\n"); - return 0; - } - } else { - efi_config_table_32_t *tbl = (efi_config_table_32_t *)config_tables; - - guid = tbl->guid; - table = tbl->table; - } - - if (!(efi_guidcmp(guid, ACPI_TABLE_GUID))) - rsdp_addr = table; - else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) - return table; - } + return __efi_get_rsdp_addr(config_tables, nr_tables, efi_64); +#else + return 0; #endif - return rsdp_addr; } static u8 compute_checksum(u8 *buffer, u32 length) @@ -220,6 +283,14 @@ acpi_physical_address get_rsdp_addr(void) if (!pa) pa = boot_params->acpi_rsdp_addr; + /* + * Try to get EFI data from setup_data. This can happen when we're a + * kexec'ed kernel and kexec(1) has passed all the required EFI info to + * us. + */ + if (!pa) + pa = kexec_get_rsdp_addr(); + if (!pa) pa = efi_get_rsdp_addr(); diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fafb75c6c592..6233ae35d0d9 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -659,6 +659,7 @@ no_longmode: gdt64: .word gdt_end - gdt .quad 0 + .balign 8 gdt: .word gdt_end - gdt .long gdt diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5a237e8dbf8d..24e65a0f756d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -351,9 +351,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, /* Clear flags intended for solely in-kernel use. */ boot_params->hdr.loadflags &= ~KASLR_FLAG; - /* Save RSDP address for later use. */ - /* boot_params->acpi_rsdp_addr = get_rsdp_addr(); */ - sanitize_boot_params(boot_params); if (boot_params->screen_info.orig_video_mode == 7) { @@ -368,6 +365,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, cols = boot_params->screen_info.orig_video_cols; console_init(); + + /* + * Save RSDP address for later use. Have this after console_init() + * so that early debugging output from the RSDP parsing code can be + * collected. + */ + boot_params->acpi_rsdp_addr = get_rsdp_addr(); + debug_putstr("early console in extract_kernel\n"); free_mem_ptr = heap; /* Heap */ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 850b8762e889..2c11c0f45d49 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -313,7 +313,7 @@ start_sys_seg: .word SYSSEG # obsolete and meaningless, but just type_of_loader: .byte 0 # 0 means ancient bootloader, newer # bootloaders know to change this. - # See Documentation/x86/boot.txt for + # See Documentation/x86/boot.rst for # assigned ids # flags, unused bits must be zero (RFU) bit within loadflags @@ -419,7 +419,17 @@ xloadflags: # define XLF4 0 #endif - .word XLF0 | XLF1 | XLF23 | XLF4 +#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_5LEVEL +#define XLF56 (XLF_5LEVEL|XLF_5LEVEL_ENABLED) +#else +#define XLF56 XLF_5LEVEL +#endif +#else +#define XLF56 0 +#endif + + .word XLF0 | XLF1 | XLF23 | XLF4 | XLF56 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 2b2481acc661..59ce9ed58430 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -130,7 +130,6 @@ CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_MAC80211_LEDS=y CONFIG_RFKILL=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEBUG_DEVRES=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index e8829abf063a..d0a5ffeae8df 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -129,7 +129,6 @@ CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_MAC80211_LEDS=y CONFIG_RFKILL=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEBUG_DEVRES=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index e9b866e87d48..73c0ccb009a0 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -371,20 +371,6 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) } } -static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_enc(ctx, dst, src); -} - -static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - - aesni_dec(ctx, dst, src); -} - static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int len) { @@ -920,7 +906,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif -static struct crypto_alg aesni_algs[] = { { +static struct crypto_alg aesni_cipher_alg = { .cra_name = "aes", .cra_driver_name = "aes-aesni", .cra_priority = 300, @@ -937,24 +923,7 @@ static struct crypto_alg aesni_algs[] = { { .cia_decrypt = aes_decrypt } } -}, { - .cra_name = "__aes", - .cra_driver_name = "__aes-aesni", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = CRYPTO_AES_CTX_SIZE, - .cra_module = THIS_MODULE, - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, - .cia_encrypt = __aes_encrypt, - .cia_decrypt = __aes_decrypt - } - } -} }; +}; static struct skcipher_alg aesni_skciphers[] = { { @@ -1150,7 +1119,7 @@ static int __init aesni_init(void) #endif #endif - err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); + err = crypto_register_alg(&aesni_cipher_alg); if (err) return err; @@ -1158,7 +1127,7 @@ static int __init aesni_init(void) ARRAY_SIZE(aesni_skciphers), aesni_simd_skciphers); if (err) - goto unregister_algs; + goto unregister_cipher; err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), aesni_simd_aeads); @@ -1170,8 +1139,8 @@ static int __init aesni_init(void) unregister_skciphers: simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), aesni_simd_skciphers); -unregister_algs: - crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); +unregister_cipher: + crypto_unregister_alg(&aesni_cipher_alg); return err; } @@ -1181,7 +1150,7 @@ static void __exit aesni_exit(void) aesni_simd_aeads); simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), aesni_simd_skciphers); - crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); + crypto_unregister_alg(&aesni_cipher_alg); } late_initcall(aesni_init); diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 1ce0019c059c..388f95a4ec24 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -124,7 +124,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src, } static int chacha_simd_stream_xor(struct skcipher_walk *walk, - struct chacha_ctx *ctx, u8 *iv) + const struct chacha_ctx *ctx, const u8 *iv) { u32 *state, state_buf[16 + 2] __aligned(8); int next_yield = 4096; /* bytes until next FPU yield */ diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index efb0d1b1f15f..9f1f9e3b8230 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with .endif .endm -/* - * This is a sneaky trick to help the unwinder find pt_regs on the stack. The - * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just setting the LSB, which makes it an invalid stack address and is also - * a signal to the unwinder that it's a pt_regs pointer in disguise. - * - * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts - * the original rbp. - */ -.macro ENCODE_FRAME_POINTER ptregs_offset=0 -#ifdef CONFIG_FRAME_POINTER - leaq 1+\ptregs_offset(%rsp), %rbp -#endif -.endm - #ifdef CONFIG_PAGE_TABLE_ISOLATION /* diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2418804e66b4..536b574b6161 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -72,23 +72,18 @@ static long syscall_trace_enter(struct pt_regs *regs) struct thread_info *ti = current_thread_info(); unsigned long ret = 0; - bool emulated = false; u32 work; if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags); - if (unlikely(work & _TIF_SYSCALL_EMU)) - emulated = true; - - if ((emulated || (work & _TIF_SYSCALL_TRACE)) && - tracehook_report_syscall_entry(regs)) - return -1L; - - if (emulated) - return -1L; + if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { + ret = tracehook_report_syscall_entry(regs); + if (ret || (work & _TIF_SYSCALL_EMU)) + return -1L; + } #ifdef CONFIG_SECCOMP /* diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 7b23431be5cb..90b473297299 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -67,7 +67,6 @@ # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else # define preempt_stop(clobbers) -# define resume_kernel restore_all_kernel #endif .macro TRACE_IRQS_IRET @@ -203,9 +202,102 @@ .Lend_\@: .endm +#define CS_FROM_ENTRY_STACK (1 << 31) +#define CS_FROM_USER_CR3 (1 << 30) +#define CS_FROM_KERNEL (1 << 29) + +.macro FIXUP_FRAME + /* + * The high bits of the CS dword (__csh) are used for CS_FROM_*. + * Clear them in case hardware didn't do this for us. + */ + andl $0x0000ffff, 3*4(%esp) + +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, 4*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ +#endif + testl $SEGMENT_RPL_MASK, 3*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ + + orl $CS_FROM_KERNEL, 3*4(%esp) + + /* + * When we're here from kernel mode; the (exception) stack looks like: + * + * 5*4(%esp) - <previous context> + * 4*4(%esp) - flags + * 3*4(%esp) - cs + * 2*4(%esp) - ip + * 1*4(%esp) - orig_eax + * 0*4(%esp) - gs / function + * + * Lets build a 5 entry IRET frame after that, such that struct pt_regs + * is complete and in particular regs->sp is correct. This gives us + * the original 5 enties as gap: + * + * 12*4(%esp) - <previous context> + * 11*4(%esp) - gap / flags + * 10*4(%esp) - gap / cs + * 9*4(%esp) - gap / ip + * 8*4(%esp) - gap / orig_eax + * 7*4(%esp) - gap / gs / function + * 6*4(%esp) - ss + * 5*4(%esp) - sp + * 4*4(%esp) - flags + * 3*4(%esp) - cs + * 2*4(%esp) - ip + * 1*4(%esp) - orig_eax + * 0*4(%esp) - gs / function + */ + + pushl %ss # ss + pushl %esp # sp (points at ss) + addl $6*4, (%esp) # point sp back at the previous context + pushl 6*4(%esp) # flags + pushl 6*4(%esp) # cs + pushl 6*4(%esp) # ip + pushl 6*4(%esp) # orig_eax + pushl 6*4(%esp) # gs / function +.Lfrom_usermode_no_fixup_\@: +.endm + +.macro IRET_FRAME + testl $CS_FROM_KERNEL, 1*4(%esp) + jz .Lfinished_frame_\@ + + /* + * Reconstruct the 3 entry IRET frame right after the (modified) + * regs->sp without lowering %esp in between, such that an NMI in the + * middle doesn't scribble our stack. + */ + pushl %eax + pushl %ecx + movl 5*4(%esp), %eax # (modified) regs->sp + + movl 4*4(%esp), %ecx # flags + movl %ecx, -4(%eax) + + movl 3*4(%esp), %ecx # cs + andl $0x0000ffff, %ecx + movl %ecx, -8(%eax) + + movl 2*4(%esp), %ecx # ip + movl %ecx, -12(%eax) + + movl 1*4(%esp), %ecx # eax + movl %ecx, -16(%eax) + + popl %ecx + lea -16(%eax), %esp + popl %eax +.Lfinished_frame_\@: +.endm + .macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 cld PUSH_GS + FIXUP_FRAME pushl %fs pushl %es pushl %ds @@ -247,22 +339,6 @@ .Lend_\@: .endm -/* - * This is a sneaky trick to help the unwinder find pt_regs on the stack. The - * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just clearing the MSB, which makes it an invalid stack address and is also - * a signal to the unwinder that it's a pt_regs pointer in disguise. - * - * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the - * original rbp. - */ -.macro ENCODE_FRAME_POINTER -#ifdef CONFIG_FRAME_POINTER - mov %esp, %ebp - andl $0x7fffffff, %ebp -#endif -.endm - .macro RESTORE_INT_REGS popl %ebx popl %ecx @@ -375,9 +451,6 @@ * switch to it before we do any copying. */ -#define CS_FROM_ENTRY_STACK (1 << 31) -#define CS_FROM_USER_CR3 (1 << 30) - .macro SWITCH_TO_KERNEL_STACK ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV @@ -391,13 +464,6 @@ * that register for the time this macro runs */ - /* - * The high bits of the CS dword (__csh) are used for - * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case - * hardware didn't do this for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -755,7 +821,7 @@ ret_from_intr: andl $SEGMENT_RPL_MASK, %eax #endif cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace + jb restore_all_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) @@ -765,18 +831,6 @@ ENTRY(resume_userspace) jmp restore_all END(ret_from_exception) -#ifdef CONFIG_PREEMPT -ENTRY(resume_kernel) - DISABLE_INTERRUPTS(CLBR_ANY) - cmpl $0, PER_CPU_VAR(__preempt_count) - jnz restore_all_kernel - testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? - jz restore_all_kernel - call preempt_schedule_irq - jmp restore_all_kernel -END(resume_kernel) -#endif - GLOBAL(__begin_SYSENTER_singlestep_region) /* * All code from here through __end_SYSENTER_singlestep_region is subject @@ -1019,6 +1073,7 @@ restore_all: /* Restore user state */ RESTORE_REGS pop=4 # skip orig_eax/error_code .Lirq_return: + IRET_FRAME /* * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler and when returning from @@ -1027,6 +1082,15 @@ restore_all: INTERRUPT_RETURN restore_all_kernel: +#ifdef CONFIG_PREEMPT + DISABLE_INTERRUPTS(CLBR_ANY) + cmpl $0, PER_CPU_VAR(__preempt_count) + jnz .Lno_preempt + testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? + jz .Lno_preempt + call preempt_schedule_irq +.Lno_preempt: +#endif TRACE_IRQS_IRET PARANOID_EXIT_TO_KERNEL_MODE BUG_IF_WRONG_CR3 @@ -1104,6 +1168,30 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) +#ifdef CONFIG_X86_LOCAL_APIC + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + pushl $(~vector+0x80) /* Note: always in signed byte range */ + vector=vector+1 + jmp common_spurious + .align 8 + .endr +END(spurious_entries_start) + +common_spurious: + ASM_CLAC + addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */ + SAVE_ALL switch_stacks=1 + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + movl %esp, %eax + call smp_spurious_interrupt + jmp ret_from_intr +ENDPROC(common_spurious) +#endif + /* * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: @@ -1360,6 +1448,7 @@ END(page_fault) common_exception: /* the function address is in %gs's slot on the stack */ + FIXUP_FRAME pushl %fs pushl %es pushl %ds diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 11aa3b2afa4d..0ea4831a72a4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -8,7 +8,7 @@ * * entry.S contains the system-call and fault low-level handling routines. * - * Some of this is documented in Documentation/x86/entry_64.txt + * Some of this is documented in Documentation/x86/entry_64.rst * * A note on terminology: * - iret frame: Architecture defined interrupt frame from SS to RIP @@ -375,6 +375,18 @@ ENTRY(irq_entries_start) .endr END(irq_entries_start) + .align 8 +ENTRY(spurious_entries_start) + vector=FIRST_SYSTEM_VECTOR + .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR) + UNWIND_HINT_IRET_REGS + pushq $(~vector+0x80) /* Note: always in signed byte range */ + jmp common_spurious + .align 8 + vector=vector+1 + .endr +END(spurious_entries_start) + .macro DEBUG_ENTRY_ASSERT_IRQS_OFF #ifdef CONFIG_DEBUG_ENTRY pushq %rax @@ -571,10 +583,20 @@ _ASM_NOKPROBE(interrupt_entry) /* Interrupt entry/exit. */ - /* - * The interrupt stubs push (~vector+0x80) onto the stack and - * then jump to common_interrupt. - */ +/* + * The interrupt stubs push (~vector+0x80) onto the stack and + * then jump to common_spurious/interrupt. + */ +common_spurious: + addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call smp_spurious_interrupt /* rdi points to pt_regs */ + jmp ret_from_intr +END(common_spurious) +_ASM_NOKPROBE(common_spurious) + +/* common_interrupt is a hotpath. Align it */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ @@ -1142,6 +1164,11 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \ hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ +#if IS_ENABLED(CONFIG_ACRN_GUEST) +apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ + acrn_hv_callback_vector acrn_hv_vector_handler +#endif + idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET idtentry int3 do_int3 has_error_code=0 create_gap=1 idtentry stack_segment do_stack_segment has_error_code=1 @@ -1670,11 +1697,17 @@ nmi_restore: iretq END(nmi) +#ifndef CONFIG_IA32_EMULATION +/* + * This handles SYSCALL from 32-bit code. There is no way to program + * MSRs to fully disable 32-bit SYSCALL. + */ ENTRY(ignore_sysret) UNWIND_HINT_EMPTY mov $-ENOSYS, %eax sysret END(ignore_sysret) +#endif ENTRY(rewind_stack_do_exit) UNWIND_HINT_FUNC diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..c00019abd076 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -438,3 +438,5 @@ 431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 432 i386 fsmount sys_fsmount __ia32_sys_fsmount 433 i386 fspick sys_fspick __ia32_sys_fspick +434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open +435 i386 clone3 sys_clone3 __ia32_sys_clone3 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..c29976eca4a8 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,8 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +434 common pidfd_open __x64_sys_pidfd_open +435 common clone3 __x64_sys_clone3/ptregs # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 42fe42e82baf..34773395139a 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -3,6 +3,12 @@ # Building vDSO images for x86. # +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| +ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE +include $(srctree)/lib/vdso/Makefile + KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n @@ -50,7 +56,7 @@ VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ -z max-page-size=4096 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE - $(call if_changed,vdso) + $(call if_changed,vdso_and_check) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi hostprogs-y += vdso2c @@ -120,7 +126,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE - $(call if_changed,vdso) + $(call if_changed,vdso_and_check) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 @@ -159,7 +165,7 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/note.o \ $(obj)/vdso32/system_call.o \ $(obj)/vdso32/sigreturn.o - $(call if_changed,vdso) + $(call if_changed,vdso_and_check) # # The DSO images are built using a special linker script. @@ -175,6 +181,9 @@ VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ -Bsymbolic GCOV_PROFILE := n +quiet_cmd_vdso_and_check = VDSO $@ + cmd_vdso_and_check = $(cmd_vdso); $(cmd_vdso_check) + # # Install the unstripped copies of vdso*.so. If our toolchain supports # build-id, install .build-id links as well. diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 4aed41f638bb..d9ff616bb0f6 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -1,251 +1,85 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright 2006 Andi Kleen, SUSE Labs. - * * Fast user context implementation of clock_gettime, gettimeofday, and time. * + * Copyright 2006 Andi Kleen, SUSE Labs. + * Copyright 2019 ARM Limited + * * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * The code should have no internal unresolved relocations. - * Check with readelf after changing. */ - -#include <uapi/linux/time.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> -#include <asm/unistd.h> -#include <asm/msr.h> -#include <asm/pvclock.h> -#include <asm/mshyperv.h> -#include <linux/math64.h> #include <linux/time.h> #include <linux/kernel.h> +#include <linux/types.h> -#define gtod (&VVAR(vsyscall_gtod_data)) +#include "../../../../lib/vdso/gettimeofday.c" -extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); -extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); extern time_t __vdso_time(time_t *t); -#ifdef CONFIG_PARAVIRT_CLOCK -extern u8 pvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifdef CONFIG_HYPERV_TSCPAGE -extern u8 hvclock_page[PAGE_SIZE] - __attribute__((visibility("hidden"))); -#endif - -#ifndef BUILD_VDSO32 - -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { - long ret; - asm ("syscall" : "=a" (ret), "=m" (*ts) : - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : - "rcx", "r11"); - return ret; + return __cvdso_gettimeofday(tv, tz); } -#else +int gettimeofday(struct __kernel_old_timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); -notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +time_t __vdso_time(time_t *t) { - long ret; - - asm ( - "mov %%ebx, %%edx \n" - "mov %[clock], %%ebx \n" - "call __kernel_vsyscall \n" - "mov %%edx, %%ebx \n" - : "=a" (ret), "=m" (*ts) - : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) - : "edx"); - return ret; + return __cvdso_time(t); } -#endif +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -#ifdef CONFIG_PARAVIRT_CLOCK -static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) -{ - return (const struct pvclock_vsyscall_time_info *)&pvclock_page; -} -static notrace u64 vread_pvclock(void) -{ - const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; - u32 version; - u64 ret; - - /* - * Note: The kernel and hypervisor must guarantee that cpu ID - * number maps 1:1 to per-CPU pvclock time info. - * - * Because the hypervisor is entirely unaware of guest userspace - * preemption, it cannot guarantee that per-CPU pvclock time - * info is updated if the underlying CPU changes or that that - * version is increased whenever underlying CPU changes. - * - * On KVM, we are guaranteed that pvti updates for any vCPU are - * atomic as seen by *all* vCPUs. This is an even stronger - * guarantee than we get with a normal seqlock. - * - * On Xen, we don't appear to have that guarantee, but Xen still - * supplies a valid seqlock using the version field. - * - * We only do pvclock vdso timing at all if - * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to - * mean that all vCPUs have matching pvti and that the TSC is - * synced, so we can just look at vCPU 0's pvti. - */ - - do { - version = pvclock_read_begin(pvti); - - if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) - return U64_MAX; - - ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); - } while (pvclock_read_retry(pvti, version)); - - return ret; -} -#endif -#ifdef CONFIG_HYPERV_TSCPAGE -static notrace u64 vread_hvclock(void) -{ - const struct ms_hyperv_tsc_page *tsc_pg = - (const struct ms_hyperv_tsc_page *)&hvclock_page; +#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64) +/* both 64-bit and x32 use these */ +extern int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +extern int __vdso_clock_getres(clockid_t clock, struct __kernel_timespec *res); - return hv_read_tsc_page(tsc_pg); -} -#endif - -notrace static inline u64 vgetcyc(int mode) +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) { - if (mode == VCLOCK_TSC) - return (u64)rdtsc_ordered(); - - /* - * For any memory-mapped vclock type, we need to make sure that gcc - * doesn't cleverly hoist a load before the mode check. Otherwise we - * might end up touching the memory-mapped page even if the vclock in - * question isn't enabled, which will segfault. Hence the barriers. - */ -#ifdef CONFIG_PARAVIRT_CLOCK - if (mode == VCLOCK_PVCLOCK) { - barrier(); - return vread_pvclock(); - } -#endif -#ifdef CONFIG_HYPERV_TSCPAGE - if (mode == VCLOCK_HVCLOCK) { - barrier(); - return vread_hvclock(); - } -#endif - return U64_MAX; + return __cvdso_clock_gettime(clock, ts); } -notrace static int do_hres(clockid_t clk, struct timespec *ts) -{ - struct vgtod_ts *base = >od->basetime[clk]; - u64 cycles, last, sec, ns; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - cycles = vgetcyc(gtod->vclock_mode); - ns = base->nsec; - last = gtod->cycle_last; - if (unlikely((s64)cycles < 0)) - return vdso_fallback_gettime(clk, ts); - if (cycles > last) - ns += (cycles - last) * gtod->mult; - ns >>= gtod->shift; - sec = base->sec; - } while (unlikely(gtod_read_retry(gtod, seq))); - - /* - * Do this outside the loop: a race inside the loop could result - * in __iter_div_u64_rem() being extremely slow. - */ - ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); - ts->tv_nsec = ns; - - return 0; -} +int clock_gettime(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace static void do_coarse(clockid_t clk, struct timespec *ts) +int __vdso_clock_getres(clockid_t clock, + struct __kernel_timespec *res) { - struct vgtod_ts *base = >od->basetime[clk]; - unsigned int seq; - - do { - seq = gtod_read_begin(gtod); - ts->tv_sec = base->sec; - ts->tv_nsec = base->nsec; - } while (unlikely(gtod_read_retry(gtod, seq))); + return __cvdso_clock_getres(clock, res); } +int clock_getres(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_getres"))); -notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) +#else +/* i386 only */ +extern int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts); +extern int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res); + +int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { - unsigned int msk; - - /* Sort out negative (CPU/FD) and invalid clocks */ - if (unlikely((unsigned int) clock >= MAX_CLOCKS)) - return vdso_fallback_gettime(clock, ts); - - /* - * Convert the clockid to a bitmask and use it to check which - * clocks are handled in the VDSO directly. - */ - msk = 1U << clock; - if (likely(msk & VGTOD_HRES)) { - return do_hres(clock, ts); - } else if (msk & VGTOD_COARSE) { - do_coarse(clock, ts); - return 0; - } - return vdso_fallback_gettime(clock, ts); + return __cvdso_clock_gettime32(clock, ts); } -int clock_gettime(clockid_t, struct timespec *) +int clock_gettime(clockid_t, struct old_timespec32 *) __attribute__((weak, alias("__vdso_clock_gettime"))); -notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { - if (likely(tv != NULL)) { - struct timespec *ts = (struct timespec *) tv; - - do_hres(CLOCK_REALTIME, ts); - tv->tv_usec /= 1000; - } - if (unlikely(tz != NULL)) { - tz->tz_minuteswest = gtod->tz_minuteswest; - tz->tz_dsttime = gtod->tz_dsttime; - } - - return 0; + return __cvdso_clock_gettime(clock, ts); } -int gettimeofday(struct timeval *, struct timezone *) - __attribute__((weak, alias("__vdso_gettimeofday"))); -/* - * This will break when the xtime seconds get inaccurate, but that is - * unlikely - */ -notrace time_t __vdso_time(time_t *t) -{ - /* This is atomic on x86 so we don't need any locks. */ - time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); +int clock_gettime64(clockid_t, struct __kernel_timespec *) + __attribute__((weak, alias("__vdso_clock_gettime64"))); - if (t) - *t = result; - return result; +int __vdso_clock_getres(clockid_t clock, struct old_timespec32 *res) +{ + return __cvdso_clock_getres_time32(clock, res); } -time_t time(time_t *t) - __attribute__((weak, alias("__vdso_time"))); + +int clock_getres(clockid_t, struct old_timespec32 *) + __attribute__((weak, alias("__vdso_clock_getres"))); +#endif diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index d3a2dce4cfa9..36b644e16272 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -25,6 +25,8 @@ VERSION { __vdso_getcpu; time; __vdso_time; + clock_getres; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso32/vdso32.lds.S b/arch/x86/entry/vdso/vdso32/vdso32.lds.S index 422764a81d32..c7720995ab1a 100644 --- a/arch/x86/entry/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/entry/vdso/vdso32/vdso32.lds.S @@ -26,6 +26,8 @@ VERSION __vdso_clock_gettime; __vdso_gettimeofday; __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; }; LINUX_2.5 { diff --git a/arch/x86/entry/vdso/vdsox32.lds.S b/arch/x86/entry/vdso/vdsox32.lds.S index 05cd1c5c4a15..16a8050a4fb6 100644 --- a/arch/x86/entry/vdso/vdsox32.lds.S +++ b/arch/x86/entry/vdso/vdsox32.lds.S @@ -21,6 +21,7 @@ VERSION { __vdso_gettimeofday; __vdso_getcpu; __vdso_time; + __vdso_clock_getres; local: *; }; } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 8db1f594e8b1..349a61d8bf34 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -22,7 +22,7 @@ #include <asm/page.h> #include <asm/desc.h> #include <asm/cpufeature.h> -#include <asm/mshyperv.h> +#include <clocksource/hyperv_timer.h> #if defined(CONFIG_X86_64) unsigned int __read_mostly vdso64_enabled = 1; diff --git a/arch/x86/entry/vsyscall/Makefile b/arch/x86/entry/vsyscall/Makefile index 1ac4dd116c26..93c1b3e949a7 100644 --- a/arch/x86/entry/vsyscall/Makefile +++ b/arch/x86/entry/vsyscall/Makefile @@ -2,7 +2,5 @@ # # Makefile for the x86 low level vsyscall code # -obj-y := vsyscall_gtod.o - obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index d9d81ad7a400..e7c596dea947 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,9 +42,11 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NONE } vsyscall_mode = +static enum { EMULATE, XONLY, NONE } vsyscall_mode __ro_after_init = #ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; +#elif defined(CONFIG_LEGACY_VSYSCALL_XONLY) + XONLY; #else EMULATE; #endif @@ -54,6 +56,8 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; + else if (!strcmp("xonly", str)) + vsyscall_mode = XONLY; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -106,14 +110,15 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr); return false; } else { return true; } } -bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { struct task_struct *tsk; unsigned long caller; @@ -122,6 +127,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) long ret; unsigned long orig_dx; + /* Write faults or kernel-privilege faults never get fixed up. */ + if ((error_code & (X86_PF_WRITE | X86_PF_USER)) != X86_PF_USER) + return false; + + if (!(error_code & X86_PF_INSTR)) { + /* Failed vsyscall read */ + if (vsyscall_mode == EMULATE) + return false; + + /* + * User code tried and failed to read the vsyscall page. + */ + warn_bad_vsyscall(KERN_INFO, regs, "vsyscall read attempt denied -- look up the vsyscall kernel parameter if you need a workaround"); + return false; + } + /* * No point in checking CS -- the only way to get here is a user mode * trap to a high address, which means that we're in 64-bit user code. @@ -268,7 +289,7 @@ do_ret: return true; sigsegv: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return true; } @@ -284,7 +305,7 @@ static const char *gate_vma_name(struct vm_area_struct *vma) static const struct vm_operations_struct gate_vma_ops = { .name = gate_vma_name, }; -static struct vm_area_struct gate_vma = { +static struct vm_area_struct gate_vma __ro_after_init = { .vm_start = VSYSCALL_ADDR, .vm_end = VSYSCALL_ADDR + PAGE_SIZE, .vm_page_prot = PAGE_READONLY_EXEC, @@ -357,12 +378,20 @@ void __init map_vsyscall(void) extern char __vsyscall_page; unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); - if (vsyscall_mode != NONE) { + /* + * For full emulation, the page needs to exist for real. In + * execute-only mode, there is no PTE at all backing the vsyscall + * page. + */ + if (vsyscall_mode == EMULATE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } + if (vsyscall_mode == XONLY) + gate_vma.vm_flags = VM_EXEC; + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != (unsigned long)VSYSCALL_ADDR); } diff --git a/arch/x86/entry/vsyscall/vsyscall_gtod.c b/arch/x86/entry/vsyscall/vsyscall_gtod.c deleted file mode 100644 index cfcdba082feb..000000000000 --- a/arch/x86/entry/vsyscall/vsyscall_gtod.c +++ /dev/null @@ -1,83 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE - * Copyright 2003 Andi Kleen, SuSE Labs. - * - * Modified for x86 32 bit architecture by - * Stefani Seibold <stefani@seibold.net> - * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany - * - * Thanks to hpa@transmeta.com for some useful hint. - * Special thanks to Ingo Molnar for his early experience with - * a different vsyscall implementation for Linux/IA32 and for the name. - * - */ - -#include <linux/timekeeper_internal.h> -#include <asm/vgtod.h> -#include <asm/vvar.h> - -int vclocks_used __read_mostly; - -DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); - -void update_vsyscall_tz(void) -{ - vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; - vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; -} - -void update_vsyscall(struct timekeeper *tk) -{ - int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; - struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - struct vgtod_ts *base; - u64 nsec; - - /* Mark the new vclock used. */ - BUILD_BUG_ON(VCLOCK_MAX >= 32); - WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); - - gtod_write_begin(vdata); - - /* copy vsyscall data */ - vdata->vclock_mode = vclock_mode; - vdata->cycle_last = tk->tkr_mono.cycle_last; - vdata->mask = tk->tkr_mono.mask; - vdata->mult = tk->tkr_mono.mult; - vdata->shift = tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_REALTIME]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_TAI]; - base->sec = tk->xtime_sec + (s64)tk->tai_offset; - base->nsec = tk->tkr_mono.xtime_nsec; - - base = &vdata->basetime[CLOCK_MONOTONIC]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec; - nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); - while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { - nsec -= ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; - base->sec++; - } - base->nsec = nsec; - - base = &vdata->basetime[CLOCK_REALTIME_COARSE]; - base->sec = tk->xtime_sec; - base->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - - base = &vdata->basetime[CLOCK_MONOTONIC_COARSE]; - base->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; - nsec += tk->wall_to_monotonic.tv_nsec; - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - base->sec++; - } - base->nsec = nsec; - - gtod_write_end(vdata); -} diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9cbfd34042d5..9e07f554333f 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += core.o +obj-y += core.o probe.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 85e6984c560b..a6ea07f2aa84 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -202,15 +202,22 @@ static int amd_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; + if (event->cpu < 0) + return -EINVAL; + /* * SliceMask and ThreadMask need to be set for certain L3 events in * Family 17h. For other events, the two fields do not affect the count. */ - if (l3_mask) - hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); + if (l3_mask && is_llc_event(event)) { + int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4); - if (event->cpu < 0) - return -EINVAL; + if (smp_num_siblings > 1) + thread += cpu_data(event->cpu).apicid & 1; + + hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) & + AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK; + } uncore = event_to_amd_uncore(event); if (!uncore) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index f315425d8468..81b005e4c7d9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -561,14 +561,14 @@ int x86_pmu_hw_config(struct perf_event *event) } /* sample_regs_user never support XMM registers */ - if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS)) + if (unlikely(event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK)) return -EINVAL; /* * Besides the general purpose registers, XMM registers may * be collected in PEBS on some platforms, e.g. Icelake */ - if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) { - if (x86_pmu.pebs_no_xmm_regs) + if (unlikely(event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK)) { + if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS)) return -EINVAL; if (!event->attr.precise_ip) @@ -1618,68 +1618,6 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = { .attrs = NULL, }; -/* - * Remove all undefined events (x86_pmu.event_map(id) == 0) - * out of events_attr attributes. - */ -static void __init filter_events(struct attribute **attrs) -{ - struct device_attribute *d; - struct perf_pmu_events_attr *pmu_attr; - int offset = 0; - int i, j; - - for (i = 0; attrs[i]; i++) { - d = (struct device_attribute *)attrs[i]; - pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); - /* str trumps id */ - if (pmu_attr->event_str) - continue; - if (x86_pmu.event_map(i + offset)) - continue; - - for (j = i; attrs[j]; j++) - attrs[j] = attrs[j + 1]; - - /* Check the shifted attr. */ - i--; - - /* - * event_map() is index based, the attrs array is organized - * by increasing event index. If we shift the events, then - * we need to compensate for the event_map(), otherwise - * we are looking up the wrong event in the map - */ - offset++; - } -} - -/* Merge two pointer arrays */ -__init struct attribute **merge_attr(struct attribute **a, struct attribute **b) -{ - struct attribute **new; - int j, i; - - for (j = 0; a && a[j]; j++) - ; - for (i = 0; b && b[i]; i++) - j++; - j++; - - new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL); - if (!new) - return NULL; - - j = 0; - for (i = 0; a && a[i]; i++) - new[j++] = a[i]; - for (i = 0; b && b[i]; i++) - new[j++] = b[i]; - new[j] = NULL; - - return new; -} - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ @@ -1744,9 +1682,24 @@ static struct attribute *events_attr[] = { NULL, }; +/* + * Remove all undefined events (x86_pmu.event_map(id) == 0) + * out of events_attr attributes. + */ +static umode_t +is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + /* str trumps id */ + return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0; +} + static struct attribute_group x86_pmu_events_group __ro_after_init = { .name = "events", .attrs = events_attr, + .is_visible = is_visible, }; ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) @@ -1842,37 +1795,10 @@ static int __init init_hw_perf_events(void) x86_pmu_format_group.attrs = x86_pmu.format_attrs; - if (x86_pmu.caps_attrs) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_caps_group.attrs, x86_pmu.caps_attrs); - if (!WARN_ON(!tmp)) - x86_pmu_caps_group.attrs = tmp; - } - - if (x86_pmu.event_attrs) - x86_pmu_events_group.attrs = x86_pmu.event_attrs; - if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; - else - filter_events(x86_pmu_events_group.attrs); - - if (x86_pmu.cpu_events) { - struct attribute **tmp; - - tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); - if (!WARN_ON(!tmp)) - x86_pmu_events_group.attrs = tmp; - } - - if (x86_pmu.attrs) { - struct attribute **tmp; - tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); - if (!WARN_ON(!tmp)) - x86_pmu_attr_group.attrs = tmp; - } + pmu.attr_update = x86_pmu.attr_update; pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); @@ -2179,7 +2105,7 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) * For now, this can't happen because all callers hold mmap_sem * for write. If this changes, we'll need a different solution. */ - lockdep_assert_held_exclusive(&mm->mmap_sem); + lockdep_assert_held_write(&mm->mmap_sem); if (atomic_inc_return(&mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(mm), refresh_pce, NULL, 1); @@ -2402,13 +2328,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_hw_regs(regs)) { - if (perf_callchain_store(entry, regs->ip)) - return; + if (perf_callchain_store(entry, regs->ip)) + return; + + if (perf_hw_regs(regs)) unwind_start(&state, current, regs, NULL); - } else { + else unwind_start(&state, current, NULL, (void *)regs->sp); - } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a5436cee20b1..9e911a96972b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -20,6 +20,7 @@ #include <asm/intel-family.h> #include <asm/apic.h> #include <asm/cpu_device_id.h> +#include <asm/hypervisor.h> #include "../perf_event.h" @@ -2160,12 +2161,10 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); cpuc->intel_cp_status &= ~(1ull << hwc->idx); - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) intel_pmu_disable_fixed(hwc); - return; - } - - x86_pmu_disable_event(event); + else + x86_pmu_disable_event(event); /* * Needs to be called after x86_pmu_disable_event, @@ -3897,8 +3896,6 @@ static __initconst const struct x86_pmu core_pmu = { .check_period = intel_pmu_check_period, }; -static struct attribute *intel_pmu_attrs[]; - static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -3930,8 +3927,6 @@ static __initconst const struct x86_pmu intel_pmu = { .format_attrs = intel_arch3_formats_attr, .events_sysfs_show = intel_event_sysfs_show, - .attrs = intel_pmu_attrs, - .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, @@ -4055,6 +4050,13 @@ static bool check_msr(unsigned long msr, u64 mask) u64 val_old, val_new, val_tmp; /* + * Disable the check for real HW, so we don't + * mess with potentionaly enabled registers: + */ + if (hypervisor_is_type(X86_HYPER_NATIVE)) + return true; + + /* * Read the current value, change it and read it back to see if it * matches, this is needed to detect certain hardware emulators * (qemu/kvm) that don't trap on the MSR access and always return 0s. @@ -4274,13 +4276,6 @@ static struct attribute *icl_tsx_events_attrs[] = { NULL, }; -static __init struct attribute **get_icl_events_attrs(void) -{ - return boot_cpu_has(X86_FEATURE_RTM) ? - merge_attr(icl_events_attrs, icl_tsx_events_attrs) : - icl_events_attrs; -} - static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4402,43 +4397,111 @@ static DEVICE_ATTR(allow_tsx_force_abort, 0644, static struct attribute *intel_pmu_attrs[] = { &dev_attr_freeze_on_smi.attr, - NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */ + &dev_attr_allow_tsx_force_abort.attr, NULL, }; -static __init struct attribute ** -get_events_attrs(struct attribute **base, - struct attribute **mem, - struct attribute **tsx) +static umode_t +tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - struct attribute **attrs = base; - struct attribute **old; + return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; +} - if (mem && x86_pmu.pebs) - attrs = merge_attr(attrs, mem); +static umode_t +pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.pebs ? attr->mode : 0; +} - if (tsx && boot_cpu_has(X86_FEATURE_RTM)) { - old = attrs; - attrs = merge_attr(attrs, tsx); - if (old != base) - kfree(old); - } +static umode_t +lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.lbr_nr ? attr->mode : 0; +} - return attrs; +static umode_t +exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return x86_pmu.version >= 2 ? attr->mode : 0; } +static umode_t +default_is_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + if (attr == &dev_attr_allow_tsx_force_abort.attr) + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; + + return attr->mode; +} + +static struct attribute_group group_events_td = { + .name = "events", +}; + +static struct attribute_group group_events_mem = { + .name = "events", + .is_visible = pebs_is_visible, +}; + +static struct attribute_group group_events_tsx = { + .name = "events", + .is_visible = tsx_is_visible, +}; + +static struct attribute_group group_caps_gen = { + .name = "caps", + .attrs = intel_pmu_caps_attrs, +}; + +static struct attribute_group group_caps_lbr = { + .name = "caps", + .attrs = lbr_attrs, + .is_visible = lbr_is_visible, +}; + +static struct attribute_group group_format_extra = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_format_extra_skl = { + .name = "format", + .is_visible = exra_is_visible, +}; + +static struct attribute_group group_default = { + .attrs = intel_pmu_attrs, + .is_visible = default_is_visible, +}; + +static const struct attribute_group *attr_update[] = { + &group_events_td, + &group_events_mem, + &group_events_tsx, + &group_caps_gen, + &group_caps_lbr, + &group_format_extra, + &group_format_extra_skl, + &group_default, + NULL, +}; + +static struct attribute *empty_attrs; + __init int intel_pmu_init(void) { - struct attribute **extra_attr = NULL; - struct attribute **mem_attr = NULL; - struct attribute **tsx_attr = NULL; - struct attribute **to_free = NULL; + struct attribute **extra_skl_attr = &empty_attrs; + struct attribute **extra_attr = &empty_attrs; + struct attribute **td_attr = &empty_attrs; + struct attribute **mem_attr = &empty_attrs; + struct attribute **tsx_attr = &empty_attrs; union cpuid10_edx edx; union cpuid10_eax eax; union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; struct extra_reg *er; + bool pmem = false; int version, i; char *name; @@ -4596,7 +4659,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = slm_events_attrs; + td_attr = slm_events_attrs; extra_attr = slm_format_attr; pr_cont("Silvermont events, "); name = "silvermont"; @@ -4624,7 +4687,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_prec_dist = true; x86_pmu.lbr_pt_coexist = true; x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; extra_attr = slm_format_attr; pr_cont("Goldmont events, "); name = "goldmont"; @@ -4651,7 +4714,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_PEBS_ALL; x86_pmu.get_event_constraints = glp_get_event_constraints; - x86_pmu.cpu_events = glm_events_attrs; + td_attr = glm_events_attrs; /* Goldmont Plus has 4-wide pipeline */ event_attr_td_total_slots_scale_glm.event_str = "4"; extra_attr = slm_format_attr; @@ -4740,7 +4803,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4781,7 +4844,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.cpu_events = snb_events_attrs; + td_attr = snb_events_attrs; mem_attr = snb_mem_events_attrs; /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */ @@ -4818,10 +4881,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Haswell events, "); @@ -4860,10 +4923,10 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; x86_pmu.limit_period = bdw_limit_period; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; pr_cont("Broadwell events, "); @@ -4890,9 +4953,10 @@ __init int intel_pmu_init(void) name = "knights-landing"; break; + case INTEL_FAM6_SKYLAKE_X: + pmem = true; case INTEL_FAM6_SKYLAKE_MOBILE: case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: x86_add_quirk(intel_pebs_isolation_quirk); @@ -4920,27 +4984,28 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = hsw_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - to_free = extra_attr; - x86_pmu.cpu_events = hsw_events_attrs; + extra_skl_attr = skl_format_attr; + td_attr = hsw_events_attrs; mem_attr = hsw_mem_events_attrs; tsx_attr = hsw_tsx_events_attrs; - intel_pmu_pebs_data_source_skl( - boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); + intel_pmu_pebs_data_source_skl(pmem); if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) { x86_pmu.flags |= PMU_FL_TFA; x86_pmu.get_event_constraints = tfa_get_event_constraints; x86_pmu.enable_all = intel_tfa_pmu_enable_all; x86_pmu.commit_scheduling = intel_tfa_commit_scheduling; - intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr; } pr_cont("Skylake events, "); name = "skylake"; break; + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_XEON_D: + pmem = true; case INTEL_FAM6_ICELAKE_MOBILE: + case INTEL_FAM6_ICELAKE_DESKTOP: x86_pmu.late_ack = true; memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); @@ -4959,11 +5024,12 @@ __init int intel_pmu_init(void) x86_pmu.get_event_constraints = icl_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; - extra_attr = merge_attr(extra_attr, skl_format_attr); - x86_pmu.cpu_events = get_icl_events_attrs(); + extra_skl_attr = skl_format_attr; + mem_attr = icl_events_attrs; + tsx_attr = icl_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02); x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(false); + intel_pmu_pebs_data_source_skl(pmem); pr_cont("Icelake events, "); name = "icelake"; break; @@ -4988,14 +5054,14 @@ __init int intel_pmu_init(void) snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name); - if (version >= 2 && extra_attr) { - x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, - extra_attr); - WARN_ON(!x86_pmu.format_attrs); - } - x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events, - mem_attr, tsx_attr); + group_events_td.attrs = td_attr; + group_events_mem.attrs = mem_attr; + group_events_tsx.attrs = tsx_attr; + group_format_extra.attrs = extra_attr; + group_format_extra_skl.attrs = extra_skl_attr; + + x86_pmu.attr_update = attr_update; if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", @@ -5043,12 +5109,8 @@ __init int intel_pmu_init(void) x86_pmu.lbr_nr = 0; } - x86_pmu.caps_attrs = intel_pmu_caps_attrs; - - if (x86_pmu.lbr_nr) { - x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs); + if (x86_pmu.lbr_nr) pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); - } /* * Access extra MSR may cause #GP under certain circumstances. @@ -5078,7 +5140,6 @@ __init int intel_pmu_init(void) if (x86_pmu.counter_freezing) x86_pmu.handle_irq = intel_pmu_handle_irq_v4; - kfree(to_free); return 0; } diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 6072f92cb8ea..688592b34564 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -96,6 +96,7 @@ #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); @@ -144,25 +145,42 @@ enum perf_cstate_core_events { PERF_CSTATE_CORE_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c1-residency, attr_cstate_core_c1, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_core_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_core_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_core_c7, "event=0x03"); -static struct perf_cstate_msr core_msr[] = { - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, +static unsigned long core_msr_mask; + +PMU_EVENT_GROUP(events, cstate_core_c1); +PMU_EVENT_GROUP(events, cstate_core_c3); +PMU_EVENT_GROUP(events, cstate_core_c6); +PMU_EVENT_GROUP(events, cstate_core_c7); + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr core_msr[] = { + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &group_cstate_core_c1, test_msr }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &group_cstate_core_c3, test_msr }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &group_cstate_core_c6, test_msr }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &group_cstate_core_c7, test_msr }, }; -static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { +static struct attribute *attrs_empty[] = { NULL, }; +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ static struct attribute_group core_events_attr_group = { .name = "events", - .attrs = core_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); @@ -211,31 +229,37 @@ enum perf_cstate_pkg_events { PERF_CSTATE_PKG_EVENT_MAX, }; -PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); -PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); -PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); -PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_pkg_c7, "event=0x03"); -PMU_EVENT_ATTR_STRING(c8-residency, evattr_cstate_pkg_c8, "event=0x04"); -PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); -PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); - -static struct perf_cstate_msr pkg_msr[] = { - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, -}; - -static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { - NULL, +PMU_EVENT_ATTR_STRING(c2-residency, attr_cstate_pkg_c2, "event=0x00"); +PMU_EVENT_ATTR_STRING(c3-residency, attr_cstate_pkg_c3, "event=0x01"); +PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_pkg_c6, "event=0x02"); +PMU_EVENT_ATTR_STRING(c7-residency, attr_cstate_pkg_c7, "event=0x03"); +PMU_EVENT_ATTR_STRING(c8-residency, attr_cstate_pkg_c8, "event=0x04"); +PMU_EVENT_ATTR_STRING(c9-residency, attr_cstate_pkg_c9, "event=0x05"); +PMU_EVENT_ATTR_STRING(c10-residency, attr_cstate_pkg_c10, "event=0x06"); + +static unsigned long pkg_msr_mask; + +PMU_EVENT_GROUP(events, cstate_pkg_c2); +PMU_EVENT_GROUP(events, cstate_pkg_c3); +PMU_EVENT_GROUP(events, cstate_pkg_c6); +PMU_EVENT_GROUP(events, cstate_pkg_c7); +PMU_EVENT_GROUP(events, cstate_pkg_c8); +PMU_EVENT_GROUP(events, cstate_pkg_c9); +PMU_EVENT_GROUP(events, cstate_pkg_c10); + +static struct perf_msr pkg_msr[] = { + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &group_cstate_pkg_c2, test_msr }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &group_cstate_pkg_c3, test_msr }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &group_cstate_pkg_c6, test_msr }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &group_cstate_pkg_c7, test_msr }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &group_cstate_pkg_c8, test_msr }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &group_cstate_pkg_c9, test_msr }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, }; static struct attribute_group pkg_events_attr_group = { .name = "events", - .attrs = pkg_events_attrs, + .attrs = attrs_empty, }; DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); @@ -289,7 +313,8 @@ static int cstate_pmu_event_init(struct perf_event *event) if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; - if (!core_msr[cfg].attr) + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_CORE_EVENT_MAX); + if (!(core_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; cpu = cpumask_any_and(&cstate_core_cpu_mask, @@ -298,11 +323,11 @@ static int cstate_pmu_event_init(struct perf_event *event) if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); - if (!pkg_msr[cfg].attr) + if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_core_cpumask(event->cpu)); + topology_die_cpumask(event->cpu)); } else { return -ENOENT; } @@ -385,7 +410,7 @@ static int cstate_cpu_exit(unsigned int cpu) if (has_cstate_pkg && cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate events if there is a valid target */ if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_pkg_cpu_mask); @@ -414,15 +439,35 @@ static int cstate_cpu_init(unsigned int cpu) * in the package cpu mask as the designated reader. */ target = cpumask_any_and(&cstate_pkg_cpu_mask, - topology_core_cpumask(cpu)); + topology_die_cpumask(cpu)); if (has_cstate_pkg && target >= nr_cpu_ids) cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); return 0; } +const struct attribute_group *core_attr_update[] = { + &group_cstate_core_c1, + &group_cstate_core_c3, + &group_cstate_core_c6, + &group_cstate_core_c7, + NULL, +}; + +const struct attribute_group *pkg_attr_update[] = { + &group_cstate_pkg_c2, + &group_cstate_pkg_c3, + &group_cstate_pkg_c6, + &group_cstate_pkg_c7, + &group_cstate_pkg_c8, + &group_cstate_pkg_c9, + &group_cstate_pkg_c10, + NULL, +}; + static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, + .attr_update = core_attr_update, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -437,6 +482,7 @@ static struct pmu cstate_core_pmu = { static struct pmu cstate_pkg_pmu = { .attr_groups = pkg_attr_groups, + .attr_update = pkg_attr_update, .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, @@ -580,35 +626,11 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_DESKTOP, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); -/* - * Probe the cstate events and insert the available one into sysfs attrs - * Return false if there are no available events. - */ -static bool __init cstate_probe_msr(const unsigned long evmsk, int max, - struct perf_cstate_msr *msr, - struct attribute **attrs) -{ - bool found = false; - unsigned int bit; - u64 val; - - for (bit = 0; bit < max; bit++) { - if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { - *attrs++ = &msr[bit].attr->attr.attr; - found = true; - } else { - msr[bit].attr = NULL; - } - } - *attrs = NULL; - - return found; -} - static int __init cstate_probe(const struct cstate_model *cm) { /* SLM has different MSR for PKG C6 */ @@ -620,13 +642,14 @@ static int __init cstate_probe(const struct cstate_model *cm) pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; - has_cstate_core = cstate_probe_msr(cm->core_events, - PERF_CSTATE_CORE_EVENT_MAX, - core_msr, core_events_attrs); + core_msr_mask = perf_msr_probe(core_msr, PERF_CSTATE_CORE_EVENT_MAX, + true, (void *) &cm->core_events); - has_cstate_pkg = cstate_probe_msr(cm->pkg_events, - PERF_CSTATE_PKG_EVENT_MAX, - pkg_msr, pkg_events_attrs); + pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, + true, (void *) &cm->pkg_events); + + has_cstate_core = !!core_msr_mask; + has_cstate_pkg = !!pkg_msr_mask; return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; } @@ -663,7 +686,13 @@ static int __init cstate_init(void) } if (has_cstate_pkg) { - err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); + if (topology_max_die_per_package() > 1) { + err = perf_pmu_register(&cstate_pkg_pmu, + "cstate_die", -1); + } else { + err = perf_pmu_register(&cstate_pkg_pmu, + cstate_pkg_pmu.name, -1); + } if (err) { has_cstate_pkg = false; pr_info("Failed to register cstate pkg pmu\n"); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7acc526b4ad2..2c8db2c19328 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -337,7 +337,7 @@ static int alloc_pebs_buffer(int cpu) struct debug_store *ds = hwev->ds; size_t bsiz = x86_pmu.pebs_buffer_size; int max, node = cpu_to_node(cpu); - void *buffer, *ibuffer, *cea; + void *buffer, *insn_buff, *cea; if (!x86_pmu.pebs) return 0; @@ -351,12 +351,12 @@ static int alloc_pebs_buffer(int cpu) * buffer then. */ if (x86_pmu.intel_cap.pebs_format < 2) { - ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); - if (!ibuffer) { + insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!insn_buff) { dsfree_pages(buffer, bsiz); return -ENOMEM; } - per_cpu(insn_buffer, cpu) = ibuffer; + per_cpu(insn_buffer, cpu) = insn_buff; } hwev->ds_pebs_vaddr = buffer; /* Update the cpu entry area mapping */ @@ -987,7 +987,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) pebs_data_cfg |= PEBS_DATACFG_GP; if ((sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_XMM_REGS)) + (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK)) pebs_data_cfg |= PEBS_DATACFG_XMMS; if (sample_type & PERF_SAMPLE_BRANCH_STACK) { @@ -1964,10 +1964,9 @@ void __init intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; - if (x86_pmu.version <= 4) { + if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - x86_pmu.pebs_no_xmm_regs = 1; - } + if (x86_pmu.pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; @@ -2020,9 +2019,9 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; + x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ - x86_pmu.pebs_no_xmm_regs = 1; x86_pmu.large_pebs_flags &= ~(PERF_SAMPLE_ADDR | PERF_SAMPLE_TIME | diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 26c03f5adfb9..64ab51ffdf06 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -55,27 +55,28 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" +#include "../probe.h" MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ -#define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ -#define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ -#define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ -#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ -#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ -#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ -#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */ -#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ -#define RAPL_IDX_PSYS_NRG_STAT 4 /* psys */ -#define INTEL_RAPL_PSYS 0x5 /* pseudo-encoding */ - -#define NR_RAPL_DOMAINS 0x5 +enum perf_rapl_events { + PERF_RAPL_PP0 = 0, /* all cores */ + PERF_RAPL_PKG, /* entire package */ + PERF_RAPL_RAM, /* DRAM */ + PERF_RAPL_PP1, /* gpu */ + PERF_RAPL_PSYS, /* psys */ + + PERF_RAPL_MAX, + NR_RAPL_DOMAINS = PERF_RAPL_MAX, +}; + static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "pp0-core", "package", @@ -84,33 +85,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; -/* Clients have PP0, PKG */ -#define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* Servers have PP0, PKG, RAM */ -#define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - -/* Servers have PP0, PKG, RAM, PP1 */ -#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT) - -/* SKL clients have PP0, PKG, RAM, PP1, PSYS */ -#define RAPL_IDX_SKL_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ - 1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT|\ - 1<<RAPL_IDX_PP1_NRG_STAT|\ - 1<<RAPL_IDX_PSYS_NRG_STAT) - -/* Knights Landing has PKG, RAM */ -#define RAPL_IDX_KNL (1<<RAPL_IDX_PKG_NRG_STAT|\ - 1<<RAPL_IDX_RAM_NRG_STAT) - /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved @@ -149,26 +123,32 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; - unsigned int maxpkg; + unsigned int maxdie; struct rapl_pmu *pmus[]; }; +struct rapl_model { + unsigned long events; + bool apply_quirk; +}; + /* 1/2^hw_unit Joule */ static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; static unsigned int rapl_cntr_mask; static u64 rapl_timer_ms; +static struct perf_msr rapl_msrs[]; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - unsigned int pkgid = topology_logical_package_id(cpu); + unsigned int dieid = topology_logical_die_id(cpu); /* * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ - return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; + return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -350,7 +330,7 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, msr, ret = 0; + int bit, ret = 0; struct rapl_pmu *pmu; /* only look at RAPL events */ @@ -366,33 +346,12 @@ static int rapl_pmu_event_init(struct perf_event *event) event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - /* - * check event is known (determines counter) - */ - switch (cfg) { - case INTEL_RAPL_PP0: - bit = RAPL_IDX_PP0_NRG_STAT; - msr = MSR_PP0_ENERGY_STATUS; - break; - case INTEL_RAPL_PKG: - bit = RAPL_IDX_PKG_NRG_STAT; - msr = MSR_PKG_ENERGY_STATUS; - break; - case INTEL_RAPL_RAM: - bit = RAPL_IDX_RAM_NRG_STAT; - msr = MSR_DRAM_ENERGY_STATUS; - break; - case INTEL_RAPL_PP1: - bit = RAPL_IDX_PP1_NRG_STAT; - msr = MSR_PP1_ENERGY_STATUS; - break; - case INTEL_RAPL_PSYS: - bit = RAPL_IDX_PSYS_NRG_STAT; - msr = MSR_PLATFORM_ENERGY_STATUS; - break; - default: + if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) return -EINVAL; - } + + cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); + bit = cfg - 1; + /* check event supported */ if (!(rapl_cntr_mask & (1 << bit))) return -EINVAL; @@ -407,7 +366,7 @@ static int rapl_pmu_event_init(struct perf_event *event) return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; - event->hw.event_base = msr; + event->hw.event_base = rapl_msrs[bit].msr; event->hw.config = cfg; event->hw.idx = bit; @@ -457,110 +416,111 @@ RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); -static struct attribute *rapl_events_srv_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +/* + * There are no default events, but we need to create + * "events" group (with empty attrs) before updating + * it with detected events. + */ +static struct attribute *attrs_empty[] = { + NULL, +}; - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_ram_unit), +static struct attribute_group rapl_pmu_events_group = { + .name = "events", + .attrs = attrs_empty, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_ram_scale), +DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +static struct attribute *rapl_formats_attr[] = { + &format_attr_event.attr, NULL, }; -static struct attribute *rapl_events_cln_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), +static struct attribute_group rapl_pmu_format_group = { + .name = "format", + .attrs = rapl_formats_attr, +}; - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), +static const struct attribute_group *rapl_attr_groups[] = { + &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, NULL, }; -static struct attribute *rapl_events_hsw_attr[] = { +static struct attribute *rapl_events_cores[] = { EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_cores_unit), - EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_cores_scale), - EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute *rapl_events_skl_attr[] = { - EVENT_PTR(rapl_cores), - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_gpu), - EVENT_PTR(rapl_ram), - EVENT_PTR(rapl_psys), +static struct attribute_group rapl_events_cores_group = { + .name = "events", + .attrs = rapl_events_cores, +}; - EVENT_PTR(rapl_cores_unit), +static struct attribute *rapl_events_pkg[] = { + EVENT_PTR(rapl_pkg), EVENT_PTR(rapl_pkg_unit), - EVENT_PTR(rapl_gpu_unit), - EVENT_PTR(rapl_ram_unit), - EVENT_PTR(rapl_psys_unit), - - EVENT_PTR(rapl_cores_scale), EVENT_PTR(rapl_pkg_scale), - EVENT_PTR(rapl_gpu_scale), - EVENT_PTR(rapl_ram_scale), - EVENT_PTR(rapl_psys_scale), NULL, }; -static struct attribute *rapl_events_knl_attr[] = { - EVENT_PTR(rapl_pkg), - EVENT_PTR(rapl_ram), +static struct attribute_group rapl_events_pkg_group = { + .name = "events", + .attrs = rapl_events_pkg, +}; - EVENT_PTR(rapl_pkg_unit), +static struct attribute *rapl_events_ram[] = { + EVENT_PTR(rapl_ram), EVENT_PTR(rapl_ram_unit), - - EVENT_PTR(rapl_pkg_scale), EVENT_PTR(rapl_ram_scale), NULL, }; -static struct attribute_group rapl_pmu_events_group = { - .name = "events", - .attrs = NULL, /* patched at runtime */ +static struct attribute_group rapl_events_ram_group = { + .name = "events", + .attrs = rapl_events_ram, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); -static struct attribute *rapl_formats_attr[] = { - &format_attr_event.attr, +static struct attribute *rapl_events_gpu[] = { + EVENT_PTR(rapl_gpu), + EVENT_PTR(rapl_gpu_unit), + EVENT_PTR(rapl_gpu_scale), NULL, }; -static struct attribute_group rapl_pmu_format_group = { - .name = "format", - .attrs = rapl_formats_attr, +static struct attribute_group rapl_events_gpu_group = { + .name = "events", + .attrs = rapl_events_gpu, }; -static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, - &rapl_pmu_format_group, - &rapl_pmu_events_group, +static struct attribute *rapl_events_psys[] = { + EVENT_PTR(rapl_psys), + EVENT_PTR(rapl_psys_unit), + EVENT_PTR(rapl_psys_scale), NULL, }; +static struct attribute_group rapl_events_psys_group = { + .name = "events", + .attrs = rapl_events_psys, +}; + +static bool test_msr(int idx, void *data) +{ + return test_bit(idx, (unsigned long *) data); +} + +static struct perf_msr rapl_msrs[] = { + [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, + [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, + [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, + [PERF_RAPL_PP1] = { MSR_PP1_ENERGY_STATUS, &rapl_events_gpu_group, test_msr }, + [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, +}; + static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -572,7 +532,7 @@ static int rapl_cpu_offline(unsigned int cpu) pmu->cpu = -1; /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate rapl events to the new target */ if (target < nr_cpu_ids) { @@ -599,14 +559,14 @@ static int rapl_cpu_online(unsigned int cpu) pmu->timer_interval = ms_to_ktime(rapl_timer_ms); rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu; } /* * Check if there is an online cpu in the package which collects rapl * events already. */ - target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); + target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; @@ -633,7 +593,7 @@ static int rapl_check_hw_unit(bool apply_quirk) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ if (apply_quirk) - rapl_hw_unit[RAPL_IDX_RAM_NRG_STAT] = 16; + rapl_hw_unit[PERF_RAPL_RAM] = 16; /* * Calculate the timer rate: @@ -669,23 +629,33 @@ static void cleanup_rapl_pmus(void) { int i; - for (i = 0; i < rapl_pmus->maxpkg; i++) + for (i = 0; i < rapl_pmus->maxdie; i++) kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } +const struct attribute_group *rapl_attr_update[] = { + &rapl_events_cores_group, + &rapl_events_pkg_group, + &rapl_events_ram_group, + &rapl_events_gpu_group, + &rapl_events_gpu_group, + NULL, +}; + static int __init init_rapl_pmus(void) { - int maxpkg = topology_max_packages(); + int maxdie = topology_max_packages() * topology_max_die_per_package(); size_t size; - size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); + size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); rapl_pmus = kzalloc(size, GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; - rapl_pmus->maxpkg = maxpkg; + rapl_pmus->maxdie = maxdie; rapl_pmus->pmu.attr_groups = rapl_attr_groups; + rapl_pmus->pmu.attr_update = rapl_attr_update; rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; rapl_pmus->pmu.event_init = rapl_pmu_event_init; rapl_pmus->pmu.add = rapl_pmu_event_add; @@ -701,105 +671,96 @@ static int __init init_rapl_pmus(void) #define X86_RAPL_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } -struct intel_rapl_init_fun { - bool apply_quirk; - int cntr_mask; - struct attribute **attrs; -}; - -static const struct intel_rapl_init_fun snb_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_CLN, - .attrs = rapl_events_cln_attr, +static struct rapl_model model_snb = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_snbep = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_HSW, - .attrs = rapl_events_hsw_attr, +static struct rapl_model model_hsw = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1), + .apply_quirk = false, }; -static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SRV, - .attrs = rapl_events_srv_attr, +static struct rapl_model model_hsx = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun knl_rapl_init __initconst = { - .apply_quirk = true, - .cntr_mask = RAPL_IDX_KNL, - .attrs = rapl_events_knl_attr, +static struct rapl_model model_knl = { + .events = BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM), + .apply_quirk = true, }; -static const struct intel_rapl_init_fun skl_rapl_init __initconst = { - .apply_quirk = false, - .cntr_mask = RAPL_IDX_SKL_CLN, - .attrs = rapl_events_skl_attr, +static struct rapl_model model_skl = { + .events = BIT(PERF_RAPL_PP0) | + BIT(PERF_RAPL_PKG) | + BIT(PERF_RAPL_RAM) | + BIT(PERF_RAPL_PP1) | + BIT(PERF_RAPL_PSYS), + .apply_quirk = false, }; -static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, snb_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), - - X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), +static const struct x86_cpu_id rapl_model_match[] __initconst = { + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SANDYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE, model_snb), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, model_snbep), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, model_knl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, model_hsx), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, model_hsw), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, model_skl), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, model_skl), {}, }; -MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); +MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct intel_rapl_init_fun *rapl_init; - bool apply_quirk; + struct rapl_model *rm; int ret; - id = x86_match_cpu(rapl_cpu_match); + id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; - rapl_init = (struct intel_rapl_init_fun *)id->driver_data; - apply_quirk = rapl_init->apply_quirk; - rapl_cntr_mask = rapl_init->cntr_mask; - rapl_pmu_events_group.attrs = rapl_init->attrs; + rm = (struct rapl_model *) id->driver_data; + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, + false, (void *) &rm->events); - ret = rapl_check_hw_unit(apply_quirk); + ret = rapl_check_hw_unit(rm->apply_quirk); if (ret) return ret; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 9e3fbd47cb56..3694a5d0703d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -8,6 +8,7 @@ static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; struct intel_uncore_type **uncore_pci_uncores = empty_uncore; +struct intel_uncore_type **uncore_mmio_uncores = empty_uncore; static bool pcidrv_registered; struct pci_driver *uncore_pci_driver; @@ -15,7 +16,7 @@ struct pci_driver *uncore_pci_driver; DEFINE_RAW_SPINLOCK(pci2phy_map_lock); struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head); struct pci_extra_dev *uncore_extra_pci_dev; -static int max_packages; +static int max_dies; /* mask of cpus that collect uncore events */ static cpumask_t uncore_cpu_mask; @@ -28,7 +29,7 @@ struct event_constraint uncore_constraint_empty = MODULE_LICENSE("GPL"); -static int uncore_pcibus_to_physid(struct pci_bus *bus) +int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; int phys_id = -1; @@ -101,13 +102,13 @@ ssize_t uncore_event_show(struct kobject *kobj, struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - unsigned int pkgid = topology_logical_package_id(cpu); + unsigned int dieid = topology_logical_die_id(cpu); /* * The unsigned check also catches the '-1' return value for non * existent mappings in the topology map. */ - return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; + return dieid < max_dies ? pmu->boxes[dieid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -119,6 +120,21 @@ u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *eve return count; } +void uncore_mmio_exit_box(struct intel_uncore_box *box) +{ + if (box->io_addr) + iounmap(box->io_addr); +} + +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (!box->io_addr) + return 0; + + return readq(box->io_addr + event->hw.event_base); +} + /* * generic get constraint function for shared match/mask registers. */ @@ -312,7 +328,7 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, uncore_pmu_init_hrtimer(box); box->cpu = -1; box->pci_phys_id = -1; - box->pkgid = -1; + box->dieid = -1; /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; @@ -827,10 +843,10 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { - int pkg; + int die; - for (pkg = 0; pkg < max_packages; pkg++) - kfree(pmu->boxes[pkg]); + for (die = 0; die < max_dies; die++) + kfree(pmu->boxes[die]); kfree(pmu->boxes); } @@ -867,7 +883,7 @@ static int __init uncore_type_init(struct intel_uncore_type *type, bool setid) if (!pmus) return -ENOMEM; - size = max_packages * sizeof(struct intel_uncore_box *); + size = max_dies * sizeof(struct intel_uncore_box *); for (i = 0; i < type->num_boxes; i++) { pmus[i].func_id = setid ? i : -1; @@ -937,20 +953,21 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id struct intel_uncore_type *type; struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; - int phys_id, pkg, ret; + int phys_id, die, ret; phys_id = uncore_pcibus_to_physid(pdev->bus); if (phys_id < 0) return -ENODEV; - pkg = topology_phys_to_logical_pkg(phys_id); - if (pkg < 0) + die = (topology_max_die_per_package() > 1) ? phys_id : + topology_phys_to_logical_pkg(phys_id); + if (die < 0) return -EINVAL; if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) { int idx = UNCORE_PCI_DEV_IDX(id->driver_data); - uncore_extra_pci_dev[pkg].dev[idx] = pdev; + uncore_extra_pci_dev[die].dev[idx] = pdev; pci_set_drvdata(pdev, NULL); return 0; } @@ -989,7 +1006,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } - if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) + if (WARN_ON_ONCE(pmu->boxes[die] != NULL)) return -EINVAL; box = uncore_alloc_box(type, NUMA_NO_NODE); @@ -1003,13 +1020,13 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id atomic_inc(&box->refcnt); box->pci_phys_id = phys_id; - box->pkgid = pkg; + box->dieid = die; box->pci_dev = pdev; box->pmu = pmu; uncore_box_init(box); pci_set_drvdata(pdev, box); - pmu->boxes[pkg] = box; + pmu->boxes[die] = box; if (atomic_inc_return(&pmu->activeboxes) > 1) return 0; @@ -1017,7 +1034,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id ret = uncore_pmu_register(pmu); if (ret) { pci_set_drvdata(pdev, NULL); - pmu->boxes[pkg] = NULL; + pmu->boxes[die] = NULL; uncore_box_exit(box); kfree(box); } @@ -1028,16 +1045,17 @@ static void uncore_pci_remove(struct pci_dev *pdev) { struct intel_uncore_box *box; struct intel_uncore_pmu *pmu; - int i, phys_id, pkg; + int i, phys_id, die; phys_id = uncore_pcibus_to_physid(pdev->bus); box = pci_get_drvdata(pdev); if (!box) { - pkg = topology_phys_to_logical_pkg(phys_id); + die = (topology_max_die_per_package() > 1) ? phys_id : + topology_phys_to_logical_pkg(phys_id); for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { - if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { - uncore_extra_pci_dev[pkg].dev[i] = NULL; + if (uncore_extra_pci_dev[die].dev[i] == pdev) { + uncore_extra_pci_dev[die].dev[i] = NULL; break; } } @@ -1050,7 +1068,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) return; pci_set_drvdata(pdev, NULL); - pmu->boxes[box->pkgid] = NULL; + pmu->boxes[box->dieid] = NULL; if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pmu_unregister(pmu); uncore_box_exit(box); @@ -1062,7 +1080,7 @@ static int __init uncore_pci_init(void) size_t size; int ret; - size = max_packages * sizeof(struct pci_extra_dev); + size = max_dies * sizeof(struct pci_extra_dev); uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); if (!uncore_extra_pci_dev) { ret = -ENOMEM; @@ -1109,11 +1127,11 @@ static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, { struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; - int i, pkg; + int i, die; - pkg = topology_logical_package_id(old_cpu < 0 ? new_cpu : old_cpu); + die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu); for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; + box = pmu->boxes[die]; if (!box) continue; @@ -1141,18 +1159,33 @@ static void uncore_change_context(struct intel_uncore_type **uncores, uncore_change_type_ctx(*uncores, old_cpu, new_cpu); } -static int uncore_event_cpu_offline(unsigned int cpu) +static void uncore_box_unref(struct intel_uncore_type **types, int id) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, target; + int i; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[id]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } +} + +static int uncore_event_cpu_offline(unsigned int cpu) +{ + int die, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) goto unref; /* Find a new cpu to collect uncore events */ - target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + target = cpumask_any_but(topology_die_cpumask(cpu), cpu); /* Migrate uncore events to the new target */ if (target < nr_cpu_ids) @@ -1161,25 +1194,19 @@ static int uncore_event_cpu_offline(unsigned int cpu) target = -1; uncore_change_context(uncore_msr_uncores, cpu, target); + uncore_change_context(uncore_mmio_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); unref: /* Clear the references */ - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } + die = topology_logical_die_id(cpu); + uncore_box_unref(uncore_msr_uncores, die); + uncore_box_unref(uncore_mmio_uncores, die); return 0; } static int allocate_boxes(struct intel_uncore_type **types, - unsigned int pkg, unsigned int cpu) + unsigned int die, unsigned int cpu) { struct intel_uncore_box *box, *tmp; struct intel_uncore_type *type; @@ -1192,20 +1219,20 @@ static int allocate_boxes(struct intel_uncore_type **types, type = *types; pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { - if (pmu->boxes[pkg]) + if (pmu->boxes[die]) continue; box = uncore_alloc_box(type, cpu_to_node(cpu)); if (!box) goto cleanup; box->pmu = pmu; - box->pkgid = pkg; + box->dieid = die; list_add(&box->active_list, &allocated); } } /* Install them in the pmus */ list_for_each_entry_safe(box, tmp, &allocated, active_list) { list_del_init(&box->active_list); - box->pmu->boxes[pkg] = box; + box->pmu->boxes[die] = box; } return 0; @@ -1217,15 +1244,15 @@ cleanup: return -ENOMEM; } -static int uncore_event_cpu_online(unsigned int cpu) +static int uncore_box_ref(struct intel_uncore_type **types, + int id, unsigned int cpu) { - struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_type *type; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, ret, pkg, target; + int i, ret; - pkg = topology_logical_package_id(cpu); - ret = allocate_boxes(types, pkg, cpu); + ret = allocate_boxes(types, id, cpu); if (ret) return ret; @@ -1233,23 +1260,38 @@ static int uncore_event_cpu_online(unsigned int cpu) type = *types; pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; + box = pmu->boxes[id]; if (box && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } + return 0; +} + +static int uncore_event_cpu_online(unsigned int cpu) +{ + int die, target, msr_ret, mmio_ret; + + die = topology_logical_die_id(cpu); + msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu); + mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu); + if (msr_ret && mmio_ret) + return -ENOMEM; /* * Check if there is an online cpu in the package * which collects uncore events already. */ - target = cpumask_any_and(&uncore_cpu_mask, topology_core_cpumask(cpu)); + target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu)); if (target < nr_cpu_ids) return 0; cpumask_set_cpu(cpu, &uncore_cpu_mask); - uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!msr_ret) + uncore_change_context(uncore_msr_uncores, -1, cpu); + if (!mmio_ret) + uncore_change_context(uncore_mmio_uncores, -1, cpu); uncore_change_context(uncore_pci_uncores, -1, cpu); return 0; } @@ -1297,12 +1339,35 @@ err: return ret; } +static int __init uncore_mmio_init(void) +{ + struct intel_uncore_type **types = uncore_mmio_uncores; + int ret; + + ret = uncore_types_init(types, true); + if (ret) + goto err; + + for (; *types; types++) { + ret = type_pmu_register(*types); + if (ret) + goto err; + } + return 0; +err: + uncore_types_exit(uncore_mmio_uncores); + uncore_mmio_uncores = empty_uncore; + return ret; +} + + #define X86_UNCORE_MODEL_MATCH(model, init) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } struct intel_uncore_init_fun { void (*cpu_init)(void); int (*pci_init)(void); + void (*mmio_init)(void); }; static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { @@ -1373,6 +1438,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = { .pci_init = skl_uncore_pci_init, }; +static const struct intel_uncore_init_fun snr_uncore_init __initconst = { + .cpu_init = snr_uncore_cpu_init, + .pci_init = snr_uncore_pci_init, + .mmio_init = snr_uncore_mmio_init, +}; + static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init), @@ -1400,6 +1471,9 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_DESKTOP, icl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_X, snr_uncore_init), {}, }; @@ -1409,7 +1483,7 @@ static int __init intel_uncore_init(void) { const struct x86_cpu_id *id; struct intel_uncore_init_fun *uncore_init; - int pret = 0, cret = 0, ret; + int pret = 0, cret = 0, mret = 0, ret; id = x86_match_cpu(intel_uncore_match); if (!id) @@ -1418,7 +1492,7 @@ static int __init intel_uncore_init(void) if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return -ENODEV; - max_packages = topology_max_packages(); + max_dies = topology_max_packages() * topology_max_die_per_package(); uncore_init = (struct intel_uncore_init_fun *)id->driver_data; if (uncore_init->pci_init) { @@ -1432,7 +1506,12 @@ static int __init intel_uncore_init(void) cret = uncore_cpu_init(); } - if (cret && pret) + if (uncore_init->mmio_init) { + uncore_init->mmio_init(); + mret = uncore_mmio_init(); + } + + if (cret && pret && mret) return -ENODEV; /* Install hotplug callbacks to setup the targets for each package */ @@ -1446,6 +1525,7 @@ static int __init intel_uncore_init(void) err: uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); return ret; } @@ -1455,6 +1535,7 @@ static void __exit intel_uncore_exit(void) { cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); + uncore_types_exit(uncore_mmio_uncores); uncore_pci_exit(); } module_exit(intel_uncore_exit); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79eb2e21e4f0..f36f7bebbc1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,6 +2,7 @@ #include <linux/slab.h> #include <linux/pci.h> #include <asm/apicdef.h> +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/perf_event.h> #include "../perf_event.h" @@ -56,7 +57,10 @@ struct intel_uncore_type { unsigned fixed_ctr; unsigned fixed_ctl; unsigned box_ctl; - unsigned msr_offset; + union { + unsigned msr_offset; + unsigned mmio_offset; + }; unsigned num_shared_regs:8; unsigned single_fixed:1; unsigned pair_ctr_ctl:1; @@ -108,7 +112,7 @@ struct intel_uncore_extra_reg { struct intel_uncore_box { int pci_phys_id; - int pkgid; /* Logical package ID */ + int dieid; /* Logical die ID */ int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ @@ -125,7 +129,7 @@ struct intel_uncore_box { struct hrtimer hrtimer; struct list_head list; struct list_head active_list; - void *io_addr; + void __iomem *io_addr; struct intel_uncore_extra_reg shared_regs[0]; }; @@ -159,6 +163,7 @@ struct pci2phy_map { }; struct pci2phy_map *__find_pci2phy_map(int segment); +int uncore_pcibus_to_physid(struct pci_bus *bus); ssize_t uncore_event_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -190,6 +195,13 @@ static inline bool uncore_pmc_freerunning(int idx) return idx == UNCORE_PMC_IDX_FREERUNNING; } +static inline +unsigned int uncore_mmio_box_ctl(struct intel_uncore_box *box) +{ + return box->pmu->type->box_ctl + + box->pmu->type->mmio_offset * box->pmu->pmu_idx; +} + static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -330,7 +342,7 @@ unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) static inline unsigned uncore_fixed_ctl(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctl(box); else return uncore_msr_fixed_ctl(box); @@ -339,7 +351,7 @@ unsigned uncore_fixed_ctl(struct intel_uncore_box *box) static inline unsigned uncore_fixed_ctr(struct intel_uncore_box *box) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_fixed_ctr(box); else return uncore_msr_fixed_ctr(box); @@ -348,7 +360,7 @@ unsigned uncore_fixed_ctr(struct intel_uncore_box *box) static inline unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_event_ctl(box, idx); else return uncore_msr_event_ctl(box, idx); @@ -357,7 +369,7 @@ unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) static inline unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) { - if (box->pci_dev) + if (box->pci_dev || box->io_addr) return uncore_pci_perf_ctr(box, idx); else return uncore_msr_perf_ctr(box, idx); @@ -419,6 +431,16 @@ static inline bool is_freerunning_event(struct perf_event *event) (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); } +/* Check and reject invalid config */ +static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box, + struct perf_event *event) +{ + if (is_freerunning_event(event)) + return 0; + + return -EINVAL; +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -467,7 +489,7 @@ static inline void uncore_box_exit(struct intel_uncore_box *box) static inline bool uncore_box_is_fake(struct intel_uncore_box *box) { - return (box->pkgid < 0); + return (box->dieid < 0); } static inline struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) @@ -482,6 +504,9 @@ static inline struct intel_uncore_box *uncore_event_to_box(struct perf_event *ev struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu); u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); +void uncore_mmio_exit_box(struct intel_uncore_box *box); +u64 uncore_mmio_read_counter(struct intel_uncore_box *box, + struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); void uncore_pmu_event_start(struct perf_event *event, int flags); @@ -497,6 +522,7 @@ u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx); extern struct intel_uncore_type **uncore_msr_uncores; extern struct intel_uncore_type **uncore_pci_uncores; +extern struct intel_uncore_type **uncore_mmio_uncores; extern struct pci_driver *uncore_pci_driver; extern raw_spinlock_t pci2phy_map_lock; extern struct list_head pci2phy_map_head; @@ -528,6 +554,9 @@ int knl_uncore_pci_init(void); void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); +int snr_uncore_pci_init(void); +void snr_uncore_cpu_init(void); +void snr_uncore_mmio_init(void); /* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f8431819b3e1..dbaa1b088a30 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -3,27 +3,29 @@ #include "uncore.h" /* Uncore IMC PCI IDs */ -#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 -#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 -#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 -#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 -#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 -#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 -#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c -#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 -#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 -#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f -#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c -#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 -#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 -#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f -#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f -#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc -#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 -#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 -#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC 0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 +#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_KBL_HQ_IMC 0x5910 +#define PCI_DEVICE_ID_INTEL_KBL_WQ_IMC 0x5918 +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 #define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f #define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f #define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 @@ -34,9 +36,15 @@ #define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 #define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca #define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 +#define PCI_DEVICE_ID_INTEL_AML_YD_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_AML_YQ_IMC 0x590d +#define PCI_DEVICE_ID_INTEL_WHL_UQ_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC 0x3e34 +#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35 #define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02 #define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12 + /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff #define SNB_UNC_CTL_UMASK_MASK 0x0000ff00 @@ -420,11 +428,6 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } -static void snb_uncore_imc_exit_box(struct intel_uncore_box *box) -{ - iounmap(box->io_addr); -} - static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) {} @@ -437,13 +440,6 @@ static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct per static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) {} -static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); -} - /* * Keep the custom event_init() function compatible with old event * encoding for free running counters. @@ -570,13 +566,13 @@ static struct pmu snb_uncore_imc_pmu = { static struct intel_uncore_ops snb_uncore_imc_ops = { .init_box = snb_uncore_imc_init_box, - .exit_box = snb_uncore_imc_exit_box, + .exit_box = uncore_mmio_exit_box, .enable_box = snb_uncore_imc_enable_box, .disable_box = snb_uncore_imc_disable_box, .disable_event = snb_uncore_imc_disable_event, .enable_event = snb_uncore_imc_enable_event, .hw_config = snb_uncore_imc_hw_config, - .read_counter = snb_uncore_imc_read_counter, + .read_counter = uncore_mmio_read_counter, }; static struct intel_uncore_type snb_uncore_imc = { @@ -682,6 +678,14 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_WQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, @@ -737,6 +741,26 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_AML_YQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_4_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_WHL_UD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -807,6 +831,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(KBL_HQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core H Quad Core */ + IMC_DEV(KBL_WQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S 4 cores Work Station */ IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ @@ -821,6 +847,11 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ + IMC_DEV(AML_YD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Dual Core */ + IMC_DEV(AML_YQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core Y Mobile Quad Core */ + IMC_DEV(WHL_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_4_UQ_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Quad Core */ + IMC_DEV(WHL_UD_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U Mobile Dual Core */ IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */ { /* end marker */ } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b10e04387f38..b10a5ec79e48 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -324,12 +324,77 @@ #define SKX_M2M_PCI_PMON_CTR0 0x200 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 +/* SNR Ubox */ +#define SNR_U_MSR_PMON_CTR0 0x1f98 +#define SNR_U_MSR_PMON_CTL0 0x1f91 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTL 0x1f93 +#define SNR_U_MSR_PMON_UCLK_FIXED_CTR 0x1f94 + +/* SNR CHA */ +#define SNR_CHA_RAW_EVENT_MASK_EXT 0x3ffffff +#define SNR_CHA_MSR_PMON_CTL0 0x1c01 +#define SNR_CHA_MSR_PMON_CTR0 0x1c08 +#define SNR_CHA_MSR_PMON_BOX_CTL 0x1c00 +#define SNR_C0_MSR_PMON_BOX_FILTER0 0x1c05 + + +/* SNR IIO */ +#define SNR_IIO_MSR_PMON_CTL0 0x1e08 +#define SNR_IIO_MSR_PMON_CTR0 0x1e01 +#define SNR_IIO_MSR_PMON_BOX_CTL 0x1e00 +#define SNR_IIO_MSR_OFFSET 0x10 +#define SNR_IIO_PMON_RAW_EVENT_MASK_EXT 0x7ffff + +/* SNR IRP */ +#define SNR_IRP0_MSR_PMON_CTL0 0x1ea8 +#define SNR_IRP0_MSR_PMON_CTR0 0x1ea1 +#define SNR_IRP0_MSR_PMON_BOX_CTL 0x1ea0 +#define SNR_IRP_MSR_OFFSET 0x10 + +/* SNR M2PCIE */ +#define SNR_M2PCIE_MSR_PMON_CTL0 0x1e58 +#define SNR_M2PCIE_MSR_PMON_CTR0 0x1e51 +#define SNR_M2PCIE_MSR_PMON_BOX_CTL 0x1e50 +#define SNR_M2PCIE_MSR_OFFSET 0x10 + +/* SNR PCU */ +#define SNR_PCU_MSR_PMON_CTL0 0x1ef1 +#define SNR_PCU_MSR_PMON_CTR0 0x1ef8 +#define SNR_PCU_MSR_PMON_BOX_CTL 0x1ef0 +#define SNR_PCU_MSR_PMON_BOX_FILTER 0x1efc + +/* SNR M2M */ +#define SNR_M2M_PCI_PMON_CTL0 0x468 +#define SNR_M2M_PCI_PMON_CTR0 0x440 +#define SNR_M2M_PCI_PMON_BOX_CTL 0x438 +#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff + +/* SNR PCIE3 */ +#define SNR_PCIE3_PCI_PMON_CTL0 0x508 +#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8 +#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e4 + +/* SNR IMC */ +#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54 +#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38 +#define SNR_IMC_MMIO_PMON_CTL0 0x40 +#define SNR_IMC_MMIO_PMON_CTR0 0x8 +#define SNR_IMC_MMIO_PMON_BOX_CTL 0x22800 +#define SNR_IMC_MMIO_OFFSET 0x4000 +#define SNR_IMC_MMIO_SIZE 0x4000 +#define SNR_IMC_MMIO_BASE_OFFSET 0xd0 +#define SNR_IMC_MMIO_BASE_MASK 0x1FFFFFFF +#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8 +#define SNR_IMC_MMIO_MEM0_MASK 0x7FF + DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6"); DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21"); DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57"); +DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39"); DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); @@ -343,11 +408,14 @@ DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31"); DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43"); +DEFINE_UNCORE_FORMAT_ATTR(ch_mask2, ch_mask, "config:36-47"); DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46"); +DEFINE_UNCORE_FORMAT_ATTR(fc_mask2, fc_mask, "config:48-50"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5"); DEFINE_UNCORE_FORMAT_ATTR(filter_tid4, filter_tid, "config1:0-8"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid5, filter_tid, "config1:0-9"); DEFINE_UNCORE_FORMAT_ATTR(filter_cid, filter_cid, "config1:5"); DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8"); DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8"); @@ -1058,8 +1126,8 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - int pkg = box->pkgid; - struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; + int die = box->dieid; + struct pci_dev *filter_pdev = uncore_extra_pci_dev[die].dev[idx]; if (filter_pdev) { pci_write_config_dword(filter_pdev, reg1->reg, @@ -3585,6 +3653,7 @@ static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = { static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { .read_counter = uncore_msr_read_counter, + .hw_config = uncore_freerunning_hw_config, }; static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { @@ -3967,3 +4036,535 @@ int skx_uncore_pci_init(void) } /* end of SKX uncore support */ + +/* SNR uncore support */ + +static struct intel_uncore_type snr_uncore_ubox = { + .name = "ubox", + .num_counters = 2, + .num_boxes = 1, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .perf_ctr = SNR_U_MSR_PMON_CTR0, + .event_ctl = SNR_U_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .fixed_ctr = SNR_U_MSR_PMON_UCLK_FIXED_CTR, + .fixed_ctl = SNR_U_MSR_PMON_UCLK_FIXED_CTL, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct attribute *snr_uncore_cha_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext2.attr, + &format_attr_edge.attr, + &format_attr_tid_en.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + &format_attr_filter_tid5.attr, + NULL, +}; +static const struct attribute_group snr_uncore_chabox_format_group = { + .name = "format", + .attrs = snr_uncore_cha_formats_attr, +}; + +static int snr_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + + reg1->reg = SNR_C0_MSR_PMON_BOX_FILTER0 + + box->pmu->type->msr_offset * box->pmu->pmu_idx; + reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID; + reg1->idx = 0; + + return 0; +} + +static void snr_cha_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + + if (reg1->idx != EXTRA_REG_NONE) + wrmsrl(reg1->reg, reg1->config); + + wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static struct intel_uncore_ops snr_uncore_chabox_ops = { + .init_box = ivbep_uncore_msr_init_box, + .disable_box = snbep_uncore_msr_disable_box, + .enable_box = snbep_uncore_msr_enable_box, + .disable_event = snbep_uncore_msr_disable_event, + .enable_event = snr_cha_enable_event, + .read_counter = uncore_msr_read_counter, + .hw_config = snr_cha_hw_config, +}; + +static struct intel_uncore_type snr_uncore_chabox = { + .name = "cha", + .num_counters = 4, + .num_boxes = 6, + .perf_ctr_bits = 48, + .event_ctl = SNR_CHA_MSR_PMON_CTL0, + .perf_ctr = SNR_CHA_MSR_PMON_CTR0, + .box_ctl = SNR_CHA_MSR_PMON_BOX_CTL, + .msr_offset = HSWEP_CBO_MSR_OFFSET, + .event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT, + .ops = &snr_uncore_chabox_ops, + .format_group = &snr_uncore_chabox_format_group, +}; + +static struct attribute *snr_uncore_iio_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh9.attr, + &format_attr_ch_mask2.attr, + &format_attr_fc_mask2.attr, + NULL, +}; + +static const struct attribute_group snr_uncore_iio_format_group = { + .name = "format", + .attrs = snr_uncore_iio_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_iio = { + .name = "iio", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IIO_MSR_PMON_CTL0, + .perf_ctr = SNR_IIO_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT, + .box_ctl = SNR_IIO_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IIO_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &snr_uncore_iio_format_group, +}; + +static struct intel_uncore_type snr_uncore_irp = { + .name = "irp", + .num_counters = 2, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_IRP0_MSR_PMON_CTL0, + .perf_ctr = SNR_IRP0_MSR_PMON_CTR0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IRP0_MSR_PMON_BOX_CTL, + .msr_offset = SNR_IRP_MSR_OFFSET, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_m2pcie = { + .name = "m2pcie", + .num_counters = 4, + .num_boxes = 5, + .perf_ctr_bits = 48, + .event_ctl = SNR_M2PCIE_MSR_PMON_CTL0, + .perf_ctr = SNR_M2PCIE_MSR_PMON_CTR0, + .box_ctl = SNR_M2PCIE_MSR_PMON_BOX_CTL, + .msr_offset = SNR_M2PCIE_MSR_OFFSET, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .ops = &ivbep_uncore_msr_ops, + .format_group = &ivbep_uncore_format_group, +}; + +static int snr_pcu_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + int ev_sel = hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK; + + if (ev_sel >= 0xb && ev_sel <= 0xe) { + reg1->reg = SNR_PCU_MSR_PMON_BOX_FILTER; + reg1->idx = ev_sel - 0xb; + reg1->config = event->attr.config1 & (0xff << reg1->idx); + } + return 0; +} + +static struct intel_uncore_ops snr_uncore_pcu_ops = { + IVBEP_UNCORE_MSR_OPS_COMMON_INIT(), + .hw_config = snr_pcu_hw_config, + .get_constraint = snbep_pcu_get_constraint, + .put_constraint = snbep_pcu_put_constraint, +}; + +static struct intel_uncore_type snr_uncore_pcu = { + .name = "pcu", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCU_MSR_PMON_CTR0, + .event_ctl = SNR_PCU_MSR_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCU_MSR_PMON_BOX_CTL, + .num_shared_regs = 1, + .ops = &snr_uncore_pcu_ops, + .format_group = &skx_uncore_pcu_format_group, +}; + +enum perf_uncore_snr_iio_freerunning_type_id { + SNR_IIO_MSR_IOCLK, + SNR_IIO_MSR_BW_IN, + + SNR_IIO_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_iio_freerunning[] = { + [SNR_IIO_MSR_IOCLK] = { 0x1eac, 0x1, 0x10, 1, 48 }, + [SNR_IIO_MSR_BW_IN] = { 0x1f00, 0x1, 0x10, 8, 48 }, +}; + +static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { + /* Free-Running IIO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH IN Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 9, + .num_boxes = 5, + .num_freerunning_types = SNR_IIO_FREERUNNING_TYPE_MAX, + .freerunning = snr_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = snr_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_msr_uncores[] = { + &snr_uncore_ubox, + &snr_uncore_chabox, + &snr_uncore_iio, + &snr_uncore_irp, + &snr_uncore_m2pcie, + &snr_uncore_pcu, + &snr_uncore_iio_free_running, + NULL, +}; + +void snr_uncore_cpu_init(void) +{ + uncore_msr_uncores = snr_msr_uncores; +} + +static void snr_m2m_uncore_pci_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + int box_ctl = uncore_pci_box_ctl(box); + + __set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags); + pci_write_config_dword(pdev, box_ctl, IVBEP_PMON_BOX_CTL_INT); +} + +static struct intel_uncore_ops snr_m2m_uncore_pci_ops = { + .init_box = snr_m2m_uncore_pci_init_box, + .disable_box = snbep_uncore_pci_disable_box, + .enable_box = snbep_uncore_pci_enable_box, + .disable_event = snbep_uncore_pci_disable_event, + .enable_event = snbep_uncore_pci_enable_event, + .read_counter = snbep_uncore_pci_read_counter, +}; + +static struct attribute *snr_m2m_uncore_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask_ext3.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_thresh8.attr, + NULL, +}; + +static const struct attribute_group snr_m2m_uncore_format_group = { + .name = "format", + .attrs = snr_m2m_uncore_formats_attr, +}; + +static struct intel_uncore_type snr_uncore_m2m = { + .name = "m2m", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_M2M_PCI_PMON_CTR0, + .event_ctl = SNR_M2M_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT, + .box_ctl = SNR_M2M_PCI_PMON_BOX_CTL, + .ops = &snr_m2m_uncore_pci_ops, + .format_group = &snr_m2m_uncore_format_group, +}; + +static struct intel_uncore_type snr_uncore_pcie3 = { + .name = "pcie3", + .num_counters = 4, + .num_boxes = 1, + .perf_ctr_bits = 48, + .perf_ctr = SNR_PCIE3_PCI_PMON_CTR0, + .event_ctl = SNR_PCIE3_PCI_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL, + .ops = &ivbep_uncore_pci_ops, + .format_group = &ivbep_uncore_format_group, +}; + +enum { + SNR_PCI_UNCORE_M2M, + SNR_PCI_UNCORE_PCIE3, +}; + +static struct intel_uncore_type *snr_pci_uncores[] = { + [SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m, + [SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3, + NULL, +}; + +static const struct pci_device_id snr_uncore_pci_ids[] = { + { /* M2M */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, SNR_PCI_UNCORE_M2M, 0), + }, + { /* PCIe3 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0), + }, + { /* end: all zeroes */ } +}; + +static struct pci_driver snr_uncore_pci_driver = { + .name = "snr_uncore", + .id_table = snr_uncore_pci_ids, +}; + +int snr_uncore_pci_init(void) +{ + /* SNR UBOX DID */ + int ret = snbep_pci2phy_map_init(0x3460, SKX_CPUNODEID, + SKX_GIDNIDMAP, true); + + if (ret) + return ret; + + uncore_pci_uncores = snr_pci_uncores; + uncore_pci_driver = &snr_uncore_pci_driver; + return 0; +} + +static struct pci_dev *snr_uncore_get_mc_dev(int id) +{ + struct pci_dev *mc_dev = NULL; + int phys_id, pkg; + + while (1) { + mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3451, mc_dev); + if (!mc_dev) + break; + phys_id = uncore_pcibus_to_physid(mc_dev->bus); + if (phys_id < 0) + continue; + pkg = topology_phys_to_logical_pkg(phys_id); + if (pkg < 0) + continue; + else if (pkg == id) + break; + } + return mc_dev; +} + +static void snr_uncore_mmio_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = snr_uncore_get_mc_dev(box->dieid); + unsigned int box_ctl = uncore_mmio_box_ctl(box); + resource_size_t addr; + u32 pci_dword; + + if (!pdev) + return; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); + addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + + pci_read_config_dword(pdev, SNR_IMC_MMIO_MEM0_OFFSET, &pci_dword); + addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; + + addr += box_ctl; + + box->io_addr = ioremap(addr, SNR_IMC_MMIO_SIZE); + if (!box->io_addr) + return; + + writel(IVBEP_PMON_BOX_CTL_INT, box->io_addr); +} + +static void snr_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config |= SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + u32 config; + + if (!box->io_addr) + return; + + config = readl(box->io_addr); + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + writel(config, box->io_addr); +} + +static void snr_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config | SNBEP_PMON_CTL_EN, + box->io_addr + hwc->config_base); +} + +static void snr_uncore_mmio_disable_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (!box->io_addr) + return; + + writel(hwc->config, box->io_addr + hwc->config_base); +} + +static struct intel_uncore_ops snr_uncore_mmio_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = snr_uncore_mmio_disable_box, + .enable_box = snr_uncore_mmio_enable_box, + .disable_event = snr_uncore_mmio_disable_event, + .enable_event = snr_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +static struct uncore_event_desc snr_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x00,umask=0x00"), + INTEL_UNCORE_EVENT_DESC(cas_count_read, "event=0x04,umask=0x0f"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(cas_count_write.unit, "MiB"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_type snr_uncore_imc = { + .name = "imc", + .num_counters = 4, + .num_boxes = 2, + .perf_ctr_bits = 48, + .fixed_ctr_bits = 48, + .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, + .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, + .event_descs = snr_uncore_imc_events, + .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, + .event_ctl = SNR_IMC_MMIO_PMON_CTL0, + .event_mask = SNBEP_PMON_RAW_EVENT_MASK, + .box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL, + .mmio_offset = SNR_IMC_MMIO_OFFSET, + .ops = &snr_uncore_mmio_ops, + .format_group = &skx_uncore_format_group, +}; + +enum perf_uncore_snr_imc_freerunning_type_id { + SNR_IMC_DCLK, + SNR_IMC_DDR, + + SNR_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snr_imc_freerunning[] = { + [SNR_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 }, + [SNR_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 }, +}; + +static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = { + INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"), + + INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"), +}; + +static struct intel_uncore_ops snr_uncore_imc_freerunning_ops = { + .init_box = snr_uncore_mmio_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type snr_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 1, + .num_freerunning_types = SNR_IMC_FREERUNNING_TYPE_MAX, + .freerunning = snr_imc_freerunning, + .ops = &snr_uncore_imc_freerunning_ops, + .event_descs = snr_uncore_imc_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + +static struct intel_uncore_type *snr_mmio_uncores[] = { + &snr_uncore_imc, + &snr_uncore_imc_free_running, + NULL, +}; + +void snr_uncore_mmio_init(void) +{ + uncore_mmio_uncores = snr_mmio_uncores; +} + +/* end of SNR uncore support */ diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index f3f4c2263501..9431447541e9 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <linux/sysfs.h> #include <linux/nospec.h> #include <asm/intel-family.h> +#include "probe.h" enum perf_msr_id { PERF_MSR_TSC = 0, @@ -12,32 +14,30 @@ enum perf_msr_id { PERF_MSR_PTSC = 5, PERF_MSR_IRPERF = 6, PERF_MSR_THERM = 7, - PERF_MSR_THERM_SNAP = 8, - PERF_MSR_THERM_UNIT = 9, PERF_MSR_EVENT_MAX, }; -static bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -static bool test_ptsc(int idx) +static bool test_ptsc(int idx, void *data) { return boot_cpu_has(X86_FEATURE_PTSC); } -static bool test_irperf(int idx) +static bool test_irperf(int idx, void *data) { return boot_cpu_has(X86_FEATURE_IRPERF); } -static bool test_therm_status(int idx) +static bool test_therm_status(int idx, void *data) { return boot_cpu_has(X86_FEATURE_DTHERM); } -static bool test_intel(int idx) +static bool test_intel(int idx, void *data) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) @@ -98,37 +98,51 @@ static bool test_intel(int idx) return false; } -struct perf_msr { - u64 msr; - struct perf_pmu_events_attr *attr; - bool (*test)(int idx); +PMU_EVENT_ATTR_STRING(tsc, attr_tsc, "event=0x00" ); +PMU_EVENT_ATTR_STRING(aperf, attr_aperf, "event=0x01" ); +PMU_EVENT_ATTR_STRING(mperf, attr_mperf, "event=0x02" ); +PMU_EVENT_ATTR_STRING(pperf, attr_pperf, "event=0x03" ); +PMU_EVENT_ATTR_STRING(smi, attr_smi, "event=0x04" ); +PMU_EVENT_ATTR_STRING(ptsc, attr_ptsc, "event=0x05" ); +PMU_EVENT_ATTR_STRING(irperf, attr_irperf, "event=0x06" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin, attr_therm, "event=0x07" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, attr_therm_snap, "1" ); +PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, attr_therm_unit, "C" ); + +static unsigned long msr_mask; + +PMU_EVENT_GROUP(events, aperf); +PMU_EVENT_GROUP(events, mperf); +PMU_EVENT_GROUP(events, pperf); +PMU_EVENT_GROUP(events, smi); +PMU_EVENT_GROUP(events, ptsc); +PMU_EVENT_GROUP(events, irperf); + +static struct attribute *attrs_therm[] = { + &attr_therm.attr.attr, + &attr_therm_snap.attr.attr, + &attr_therm_unit.attr.attr, + NULL, }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00" ); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01" ); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02" ); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03" ); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04" ); -PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05" ); -PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin, evattr_therm, "event=0x07" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.snapshot, evattr_therm_snap, "1" ); -PMU_EVENT_ATTR_STRING(cpu_thermal_margin.unit, evattr_therm_unit, "C" ); +static struct attribute_group group_therm = { + .name = "events", + .attrs = attrs_therm, +}; static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, - [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, - [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, - [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &evattr_therm, test_therm_status, }, - [PERF_MSR_THERM_SNAP] = { MSR_IA32_THERM_STATUS, &evattr_therm_snap, test_therm_status, }, - [PERF_MSR_THERM_UNIT] = { MSR_IA32_THERM_STATUS, &evattr_therm_unit, test_therm_status, }, + [PERF_MSR_TSC] = { .no_check = true, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &group_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &group_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &group_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &group_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &group_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &group_irperf, test_irperf, }, + [PERF_MSR_THERM] = { MSR_IA32_THERM_STATUS, &group_therm, test_therm_status, }, }; -static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { +static struct attribute *events_attrs[] = { + &attr_tsc.attr.attr, NULL, }; @@ -153,6 +167,17 @@ static const struct attribute_group *attr_groups[] = { NULL, }; +const struct attribute_group *attr_update[] = { + &group_aperf, + &group_mperf, + &group_pperf, + &group_smi, + &group_ptsc, + &group_irperf, + &group_therm, + NULL, +}; + static int msr_event_init(struct perf_event *event) { u64 cfg = event->attr.config; @@ -169,7 +194,7 @@ static int msr_event_init(struct perf_event *event) cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); - if (!msr[cfg].attr) + if (!(msr_mask & (1 << cfg))) return -EINVAL; event->hw.idx = -1; @@ -252,32 +277,17 @@ static struct pmu pmu_msr = { .stop = msr_event_stop, .read = msr_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, + .attr_update = attr_update, }; static int __init msr_init(void) { - int i, j = 0; - if (!boot_cpu_has(X86_FEATURE_TSC)) { pr_cont("no MSR PMU driver.\n"); return 0; } - /* Probe the MSRs. */ - for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { - u64 val; - - /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) - msr[i].attr = NULL; - } - - /* List remaining MSRs in the sysfs attrs. */ - for (i = 0; i < PERF_MSR_EVENT_MAX; i++) { - if (msr[i].attr) - events_attrs[j++] = &msr[i].attr->attr.attr; - } - events_attrs[j] = NULL; + msr_mask = perf_msr_probe(msr, PERF_MSR_EVENT_MAX, true, NULL); perf_pmu_register(&pmu_msr, "msr", -1); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index a6ac2f4f76fc..8751008fc170 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -121,24 +121,6 @@ struct amd_nb { (1ULL << PERF_REG_X86_R14) | \ (1ULL << PERF_REG_X86_R15)) -#define PEBS_XMM_REGS \ - ((1ULL << PERF_REG_X86_XMM0) | \ - (1ULL << PERF_REG_X86_XMM1) | \ - (1ULL << PERF_REG_X86_XMM2) | \ - (1ULL << PERF_REG_X86_XMM3) | \ - (1ULL << PERF_REG_X86_XMM4) | \ - (1ULL << PERF_REG_X86_XMM5) | \ - (1ULL << PERF_REG_X86_XMM6) | \ - (1ULL << PERF_REG_X86_XMM7) | \ - (1ULL << PERF_REG_X86_XMM8) | \ - (1ULL << PERF_REG_X86_XMM9) | \ - (1ULL << PERF_REG_X86_XMM10) | \ - (1ULL << PERF_REG_X86_XMM11) | \ - (1ULL << PERF_REG_X86_XMM12) | \ - (1ULL << PERF_REG_X86_XMM13) | \ - (1ULL << PERF_REG_X86_XMM14) | \ - (1ULL << PERF_REG_X86_XMM15)) - /* * Per register state. */ @@ -631,14 +613,11 @@ struct x86_pmu { int attr_rdpmc_broken; int attr_rdpmc; struct attribute **format_attrs; - struct attribute **event_attrs; - struct attribute **caps_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); - struct attribute **cpu_events; + const struct attribute_group **attr_update; unsigned long attr_freeze_on_smi; - struct attribute **attrs; /* * CPU Hotplug hooks @@ -668,8 +647,7 @@ struct x86_pmu { pebs_broken :1, pebs_prec_dist :1, pebs_no_tlb :1, - pebs_no_isolation :1, - pebs_no_xmm_regs :1; + pebs_no_isolation :1; int pebs_record_size; int pebs_buffer_size; int max_pebs_events; @@ -905,8 +883,6 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip) ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event); ssize_t intel_event_sysfs_show(char *page, u64 config); -struct attribute **merge_attr(struct attribute **a, struct attribute **b); - ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c new file mode 100644 index 000000000000..c2ede2f3b277 --- /dev/null +++ b/arch/x86/events/probe.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/export.h> +#include <linux/types.h> +#include <linux/bits.h> +#include "probe.h" + +static umode_t +not_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return 0; +} + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) +{ + unsigned long avail = 0; + unsigned int bit; + u64 val; + + if (cnt >= BITS_PER_LONG) + return 0; + + for (bit = 0; bit < cnt; bit++) { + if (!msr[bit].no_check) { + struct attribute_group *grp = msr[bit].grp; + + grp->is_visible = not_visible; + + if (msr[bit].test && !msr[bit].test(bit, data)) + continue; + /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ + if (rdmsrl_safe(msr[bit].msr, &val)) + continue; + /* Disable zero counters if requested. */ + if (!zero && !val) + continue; + + grp->is_visible = NULL; + } + avail |= BIT(bit); + } + + return avail; +} +EXPORT_SYMBOL_GPL(perf_msr_probe); diff --git a/arch/x86/events/probe.h b/arch/x86/events/probe.h new file mode 100644 index 000000000000..4c8e0afc5fb5 --- /dev/null +++ b/arch/x86/events/probe.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_X86_EVENTS_PROBE_H__ +#define __ARCH_X86_EVENTS_PROBE_H__ +#include <linux/sysfs.h> + +struct perf_msr { + u64 msr; + struct attribute_group *grp; + bool (*test)(int idx, void *data); + bool no_check; +}; + +unsigned long +perf_msr_probe(struct perf_msr *msr, int cnt, bool no_zero, void *data); + +#define __PMU_EVENT_GROUP(_name) \ +static struct attribute *attrs_##_name[] = { \ + &attr_##_name.attr.attr, \ + NULL, \ +} + +#define PMU_EVENT_GROUP(_grp, _name) \ +__PMU_EVENT_GROUP(_name); \ +static struct attribute_group group_##_name = { \ + .name = #_grp, \ + .attrs = attrs_##_name, \ +} + +#endif /* __ARCH_X86_EVENTS_PROBE_H__ */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 1608050e9df9..0e033ef11a9f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,64 +17,13 @@ #include <linux/version.h> #include <linux/vmalloc.h> #include <linux/mm.h> -#include <linux/clockchips.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> - -#ifdef CONFIG_HYPERV_TSCPAGE - -static struct ms_hyperv_tsc_page *tsc_pg; - -struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return tsc_pg; -} -EXPORT_SYMBOL_GPL(hv_get_tsc_page); - -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick = hv_read_tsc_page(tsc_pg); - - if (current_tick == U64_MAX) - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 400, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - -static u64 read_hv_clock_msr(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_msr = { - .name = "hyperv_clocksource_msr", - .rating = 400, - .read = read_hv_clock_msr, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#include <clocksource/hyperv_timer.h> void *hv_hypercall_pg; EXPORT_SYMBOL_GPL(hv_hypercall_pg); -struct clocksource *hyperv_cs; -EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); @@ -343,42 +292,8 @@ void __init hyperv_init(void) x86_init.pci.arch_init = hv_pci_init; - /* - * Register Hyper-V specific clocksource. - */ -#ifdef CONFIG_HYPERV_TSCPAGE - if (ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - - tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) - goto register_msr_cs; - - hyperv_cs = &hyperv_cs_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK; - - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - return; - } -register_msr_cs: -#endif - /* - * For 32 bit guests just use the MSR based mechanism for reading - * the partition counter. - */ - - hyperv_cs = &hyperv_cs_msr; - if (ms_hyperv.features & HV_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - + /* Register Hyper-V specific clocksource */ + hv_init_clocksource(); return; remove_cpuhp_state: diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 629d1ee05599..1cee10091b9f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); /* Create the ucontext. */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + if (static_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index a43212036257..64a6c952091e 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -237,6 +237,14 @@ COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, unsigned long, tls_val, int __user *, child_tidptr) { - return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, - tls_val); + struct kernel_clone_args args = { + .flags = (clone_flags & ~CSIGNAL), + .child_tid = child_tidptr, + .parent_tid = parent_tidptr, + .exit_signal = (clone_flags & CSIGNAL), + .stack = newsp, + .tls = tls_val, + }; + + return _do_fork(&args); } diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h new file mode 100644 index 000000000000..4adb13f08af7 --- /dev/null +++ b/arch/x86/include/asm/acrn.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ACRN_H +#define _ASM_X86_ACRN_H + +extern void acrn_hv_callback_vector(void); +#ifdef CONFIG_TRACING +#define trace_acrn_hv_callback_vector acrn_hv_callback_vector +#endif + +extern void acrn_hv_vector_handler(struct pt_regs *regs); +#endif /* _ASM_X86_ACRN_H */ diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1340fa53b575..050e5f9ebf81 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -53,7 +53,7 @@ extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern unsigned int lapic_timer_period; extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { @@ -155,7 +155,6 @@ static inline int apic_force_enable(unsigned long addr) extern int apic_force_enable(unsigned long addr); #endif -extern void apic_bsp_setup(bool upmode); extern void apic_ap_setup(void); /* @@ -175,6 +174,7 @@ extern void lapic_assign_system_vectors(void); extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); extern void lapic_online(void); extern void lapic_offline(void); +extern bool apic_needs_pit(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -188,6 +188,7 @@ static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index ea3d95275b43..115127c7ad28 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -54,7 +54,7 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -68,7 +68,7 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) - : "ir" (i)); + : "ir" (i) : "memory"); } /** @@ -95,7 +95,7 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } #define arch_atomic_inc arch_atomic_inc @@ -108,7 +108,7 @@ static __always_inline void arch_atomic_inc(atomic_t *v) static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" - : "+m" (v->counter)); + : "+m" (v->counter) :: "memory"); } #define arch_atomic_dec arch_atomic_dec diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 6a5b0ec460da..52cfaecb13f9 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -9,7 +9,7 @@ /* An 64bit atomic type */ typedef struct { - u64 __aligned(8) counter; + s64 __aligned(8) counter; } atomic64_t; #define ATOMIC64_INIT(val) { (val) } @@ -71,8 +71,7 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, - long long n) +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) { return arch_cmpxchg64(&v->counter, o, n); } @@ -85,9 +84,9 @@ static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n) { - long long o; + s64 o; unsigned high = (unsigned)(n >> 32); unsigned low = (unsigned)n; alternative_atomic64(xchg, "=&A" (o), @@ -103,7 +102,7 @@ static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) * * Atomically sets the value of @v to @n. */ -static inline void arch_atomic64_set(atomic64_t *v, long long i) +static inline void arch_atomic64_set(atomic64_t *v, s64 i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -118,9 +117,9 @@ static inline void arch_atomic64_set(atomic64_t *v, long long i) * * Atomically reads the value of @v and returns it. */ -static inline long long arch_atomic64_read(const atomic64_t *v) +static inline s64 arch_atomic64_read(const atomic64_t *v) { - long long r; + s64 r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; } @@ -132,7 +131,7 @@ static inline long long arch_atomic64_read(const atomic64_t *v) * * Atomically adds @i to @v and returns @i + *@v */ -static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) +static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -143,7 +142,7 @@ static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -151,18 +150,18 @@ static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) return i; } -static inline long long arch_atomic64_inc_return(atomic64_t *v) +static inline s64 arch_atomic64_inc_return(atomic64_t *v) { - long long a; + s64 a; alternative_atomic64(inc_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; } #define arch_atomic64_inc_return arch_atomic64_inc_return -static inline long long arch_atomic64_dec_return(atomic64_t *v) +static inline s64 arch_atomic64_dec_return(atomic64_t *v) { - long long a; + s64 a; alternative_atomic64(dec_return, "=&A" (a), "S" (v) : "memory", "ecx"); return a; @@ -176,7 +175,7 @@ static inline long long arch_atomic64_dec_return(atomic64_t *v) * * Atomically adds @i to @v. */ -static inline long long arch_atomic64_add(long long i, atomic64_t *v) +static inline s64 arch_atomic64_add(s64 i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -191,7 +190,7 @@ static inline long long arch_atomic64_add(long long i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline long long arch_atomic64_sub(long long i, atomic64_t *v) +static inline s64 arch_atomic64_sub(s64 i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -234,8 +233,7 @@ static inline void arch_atomic64_dec(atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, - long long u) +static inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -254,9 +252,9 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v) } #define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero -static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) +static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) { - long long r; + s64 r; alternative_atomic64(dec_if_positive, "=&A" (r), "S" (v) : "ecx", "memory"); return r; @@ -266,17 +264,17 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void arch_atomic64_and(long long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; } -static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; @@ -284,17 +282,17 @@ static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) return old; } -static inline void arch_atomic64_or(long long i, atomic64_t *v) +static inline void arch_atomic64_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; } -static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; @@ -302,17 +300,17 @@ static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) return old; } -static inline void arch_atomic64_xor(long long i, atomic64_t *v) +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; } -static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; @@ -320,9 +318,9 @@ static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) return old; } -static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { - long long old, c = 0; + s64 old, c = 0; while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) c = old; diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index dadc20adba21..95c6ceac66b9 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -17,7 +17,7 @@ * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline long arch_atomic64_read(const atomic64_t *v) +static inline s64 arch_atomic64_read(const atomic64_t *v) { return READ_ONCE((v)->counter); } @@ -29,7 +29,7 @@ static inline long arch_atomic64_read(const atomic64_t *v) * * Atomically sets the value of @v to @i. */ -static inline void arch_atomic64_set(atomic64_t *v, long i) +static inline void arch_atomic64_set(atomic64_t *v, s64 i) { WRITE_ONCE(v->counter, i); } @@ -41,11 +41,11 @@ static inline void arch_atomic64_set(atomic64_t *v, long i) * * Atomically adds @i to @v. */ -static __always_inline void arch_atomic64_add(long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -55,11 +55,11 @@ static __always_inline void arch_atomic64_add(long i, atomic64_t *v) * * Atomically subtracts @i from @v. */ -static inline void arch_atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) - : "er" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter) : "memory"); } /** @@ -71,7 +71,7 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } @@ -87,7 +87,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_inc arch_atomic64_inc @@ -101,7 +101,7 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) - : "m" (v->counter)); + : "m" (v->counter) : "memory"); } #define arch_atomic64_dec arch_atomic64_dec @@ -142,7 +142,7 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) +static inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } @@ -155,43 +155,43 @@ static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } -static inline long arch_atomic64_sub_return(long i, atomic64_t *v) +static inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v) { return arch_atomic64_add_return(-i, v); } -static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } -static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v) { return xadd(&v->counter, -i); } -static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg -static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { return try_cmpxchg(&v->counter, old, new); } -static inline long arch_atomic64_xchg(atomic64_t *v, long new) +static inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } -static inline void arch_atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -199,7 +199,7 @@ static inline void arch_atomic64_and(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -208,7 +208,7 @@ static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) return val; } -static inline void arch_atomic64_or(long i, atomic64_t *v) +static inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -216,7 +216,7 @@ static inline void arch_atomic64_or(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); @@ -225,7 +225,7 @@ static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) return val; } -static inline void arch_atomic64_xor(long i, atomic64_t *v) +static inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -233,7 +233,7 @@ static inline void arch_atomic64_xor(long i, atomic64_t *v) : "memory"); } -static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) +static inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 14de0432d288..84f848c2541a 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -80,8 +80,8 @@ do { \ }) /* Atomic operations are already serializing on x86 */ -#define __smp_mb__before_atomic() barrier() -#define __smp_mb__after_atomic() barrier() +#define __smp_mb__before_atomic() do { } while (0) +#define __smp_mb__after_atomic() do { } while (0) #include <asm-generic/barrier.h> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 8e790ec219a5..ba15d53c1ca7 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -49,23 +49,8 @@ #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK(nr) (1 << ((nr) & 7)) -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * - * Note: there are no guarantees that this function will not be reordered - * on non x86 architectures, so if you are writing portable code, - * make sure not to rely on its reordering guarantees. - * - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ static __always_inline void -set_bit(long nr, volatile unsigned long *addr) +arch_set_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "orb %1,%0" @@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr) } } -/** - * __set_bit - Set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * Unlike set_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __set_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___set_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() - * in order to ensure changes are visible on other processors. - */ static __always_inline void -clear_bit(long nr, volatile unsigned long *addr) +arch_clear_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "andb %1,%0" @@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr) } } -/* - * clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and implies release semantics before the memory - * operation. It can be used for an unlock. - */ -static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch_clear_bit_unlock(long nr, volatile unsigned long *addr) { barrier(); - clear_bit(nr, addr); + arch_clear_bit(nr, addr); } -static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) { bool negative; asm volatile(LOCK_PREFIX "andb %2,%1" @@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile : "ir" ((char) ~(1 << nr)) : "memory"); return negative; } +#define arch_clear_bit_unlock_is_negative_byte \ + arch_clear_bit_unlock_is_negative_byte -// Let everybody know we have it -#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte - -/* - * __clear_bit_unlock - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * __clear_bit() is non-atomic and implies release semantics before the memory - * operation. It can be used for an unlock if no other CPUs can concurrently - * modify other bits in the word. - */ -static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr) +static __always_inline void +arch___clear_bit_unlock(long nr, volatile unsigned long *addr) { - __clear_bit(nr, addr); + arch___clear_bit(nr, addr); } -/** - * __change_bit - Toggle a bit in memory - * @nr: the bit to change - * @addr: the address to start counting from - * - * Unlike change_bit(), this function is non-atomic and may be reordered. - * If it's called on the same region of memory simultaneously, the effect - * may be that only one operation succeeds. - */ -static __always_inline void __change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch___change_bit(long nr, volatile unsigned long *addr) { asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory"); } -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static __always_inline void change_bit(long nr, volatile unsigned long *addr) +static __always_inline void +arch_change_bit(long nr, volatile unsigned long *addr) { if (IS_IMMEDIATE(nr)) { asm volatile(LOCK_PREFIX "xorb %1,%0" @@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) } } -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_set_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr); } -/** - * test_and_set_bit_lock - Set a bit and return its old value for lock - * @nr: Bit to set - * @addr: Address to count from - * - * This is the same as test_and_set_bit on x86. - */ static __always_inline bool -test_and_set_bit_lock(long nr, volatile unsigned long *addr) +arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr) { - return test_and_set_bit(nr, addr); + return arch_test_and_set_bit(nr, addr); } -/** - * __test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - */ -static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_set_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * return oldbit; } -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_clear_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr); } -/** - * __test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is non-atomic and can be reordered. - * If two examples of this operation race, one can appear to succeed - * but actually fail. You must protect multiple accesses with a lock. - * +/* * Note: the operation is performed atomically with respect to * the local CPU, but not other CPUs. Portable code should not * rely on this behaviour. @@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long * * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_clear_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long return oldbit; } -/* WARNING: non atomic and it can be reordered! */ -static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch___test_and_change_bit(long nr, volatile unsigned long *addr) { bool oldbit; @@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon return oldbit; } -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool +arch_test_and_change_bit(long nr, volatile unsigned long *addr) { return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } @@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l return oldbit; } -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ -/** - * test_bit - Determine whether a bit is set - * @nr: bit number to test - * @addr: Address to start counting from - */ -static bool test_bit(int nr, const volatile unsigned long *addr); -#endif - -#define test_bit(nr, addr) \ +#define arch_test_bit(nr, addr) \ (__builtin_constant_p((nr)) \ ? constant_test_bit((nr), (addr)) \ : variable_test_bit((nr), (addr))) @@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x) #include <asm-generic/bitops/const_hweight.h> +#include <asm-generic/bitops-instrumented.h> + #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic-setbit.h> diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index f6f6ef436599..101eb944f13c 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -24,7 +24,7 @@ static void sanitize_boot_params(struct boot_params *boot_params) * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear * this field. The purpose of this field is to guarantee * compliance with the x86 boot spec located in - * Documentation/x86/boot.txt . That spec says that the + * Documentation/x86/boot.rst . That spec says that the * *whole* structure should be cleared, after which only the * portion defined by struct setup_header (boot_params->hdr) * should be copied in. diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 1d337c51f7e6..58acda503817 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -22,8 +22,8 @@ enum cpuid_leafs CPUID_LNX_3, CPUID_7_0_EBX, CPUID_D_1_EAX, - CPUID_F_0_EDX, - CPUID_F_1_EDX, + CPUID_LNX_4, + CPUID_7_1_EAX, CPUID_8000_0008_EBX, CPUID_6_EAX, CPUID_8000_000A_EDX, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 75f27ee2c263..998c2cc08363 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -239,12 +239,14 @@ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ @@ -269,13 +271,19 @@ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */ -#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ +/* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0xf, etc. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */ -#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ +/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -322,6 +330,7 @@ #define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 7e42b285c856..c6136d79f8c0 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -47,7 +47,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); -void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 5cbce6fbb534..296b346184b2 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -22,6 +22,35 @@ pop %_ASM_BP .endm +#ifdef CONFIG_X86_64 +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just setting the LSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts + * the original rbp. + */ +.macro ENCODE_FRAME_POINTER ptregs_offset=0 + leaq 1+\ptregs_offset(%rsp), %rbp +.endm +#else /* !CONFIG_X86_64 */ +/* + * This is a sneaky trick to help the unwinder find pt_regs on the stack. The + * frame pointer is replaced with an encoded pointer to pt_regs. The encoding + * is just clearing the MSB, which makes it an invalid stack address and is also + * a signal to the unwinder that it's a pt_regs pointer in disguise. + * + * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the + * original ebp. + */ +.macro ENCODE_FRAME_POINTER + mov %esp, %ebp + andl $0x7fffffff, %ebp +.endm +#endif /* CONFIG_X86_64 */ + #else /* !__ASSEMBLY__ */ #define FRAME_BEGIN \ @@ -30,12 +59,32 @@ #define FRAME_END "pop %" _ASM_BP "\n" +#ifdef CONFIG_X86_64 +#define ENCODE_FRAME_POINTER \ + "lea 1(%rsp), %rbp\n\t" +#else /* !CONFIG_X86_64 */ +#define ENCODE_FRAME_POINTER \ + "movl %esp, %ebp\n\t" \ + "andl $0x7fffffff, %ebp\n\t" +#endif /* CONFIG_X86_64 */ + #endif /* __ASSEMBLY__ */ #define FRAME_OFFSET __ASM_SEL(4, 8) #else /* !CONFIG_FRAME_POINTER */ +#ifdef __ASSEMBLY__ + +.macro ENCODE_FRAME_POINTER ptregs_offset=0 +.endm + +#else /* !__ASSEMBLY */ + +#define ENCODE_FRAME_POINTER + +#endif + #define FRAME_BEGIN #define FRAME_END #define FRAME_OFFSET 0 diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index d9069bb26c7f..07533795b8d2 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -37,7 +37,7 @@ typedef struct { #ifdef CONFIG_X86_MCE_AMD unsigned int irq_deferred_error_count; #endif -#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR unsigned int irq_hv_callback_count; #endif #if IS_ENABLED(CONFIG_HYPERV) diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 67385d56d4f4..6352dee37cda 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -75,16 +75,15 @@ extern unsigned int hpet_readl(unsigned int a); extern void force_hpet_resume(void); struct irq_data; -struct hpet_dev; +struct hpet_channel; struct irq_domain; extern void hpet_msi_unmask(struct irq_data *data); extern void hpet_msi_mask(struct irq_data *data); -extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); -extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); +extern void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg); extern struct irq_domain *hpet_create_irq_domain(int hpet_id); extern int hpet_assign_irq(struct irq_domain *domain, - struct hpet_dev *dev, int dev_num); + struct hpet_channel *hc, int dev_num); #ifdef CONFIG_HPET_EMULATE_RTC diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 32e666e1231e..cbd97e22d2f3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -150,8 +150,11 @@ extern char irq_entries_start[]; #define trace_irq_entries_start irq_entries_start #endif +extern char spurious_entries_start[]; + #define VECTOR_UNUSED NULL -#define VECTOR_RETRIGGERED ((void *)~0UL) +#define VECTOR_SHUTDOWN ((void *)~0UL) +#define VECTOR_RETRIGGERED ((void *)~1UL) typedef struct irq_desc* vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cdf44aa9a501..af78cd72b8f3 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -401,6 +401,12 @@ enum HV_GENERIC_SET_FORMAT { #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* + * The Hyper-V TimeRefCount register and the TSC + * page provide a guest VM clock with 100ns tick rate + */ +#define HV_CLOCK_HZ (NSEC_PER_SEC/100) + typedef struct _HV_REFERENCE_TSC_PAGE { __u32 tsc_sequence; __u32 res1; diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index 8c5aaba6633f..50a30f6c668b 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -29,6 +29,7 @@ enum x86_hypervisor_type { X86_HYPER_XEN_HVM, X86_HYPER_KVM, X86_HYPER_JAILHOUSE, + X86_HYPER_ACRN, }; #ifdef CONFIG_HYPERVISOR_GUEST diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 310118805f57..0278aa66ef62 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -56,6 +56,7 @@ #define INTEL_FAM6_ICELAKE_XEON_D 0x6C #define INTEL_FAM6_ICELAKE_DESKTOP 0x7D #define INTEL_FAM6_ICELAKE_MOBILE 0x7E +#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* "Small Core" Processors (Atom) */ @@ -76,6 +77,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ + #define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h index 8f3bee821e6c..187ce59aea28 100644 --- a/arch/x86/include/asm/irq_regs.h +++ b/arch/x86/include/asm/irq_regs.h @@ -16,7 +16,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs); static inline struct pt_regs *get_irq_regs(void) { - return this_cpu_read(irq_regs); + return __this_cpu_read(irq_regs); } static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) @@ -24,7 +24,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) struct pt_regs *old_regs; old_regs = get_irq_regs(); - this_cpu_write(irq_regs, new_regs); + __this_cpu_write(irq_regs, new_regs); return old_regs; } diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 65191ce8e1cf..06c3cc22a058 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H +#define HAVE_JUMP_LABEL_BATCH + #define JUMP_LABEL_NOP_SIZE 5 #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 003f2daa3b0f..5e7d6b46de97 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -71,22 +71,6 @@ struct kimage; #define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* - * CPU does not save ss and sp on stack if execution is already - * running in kernel mode at the time of NMI occurrence. This code - * fixes it. - */ -static inline void crash_fixup_ss_esp(struct pt_regs *newregs, - struct pt_regs *oldregs) -{ -#ifdef CONFIG_X86_32 - newregs->sp = (unsigned long)&(oldregs->sp); - asm volatile("xorl %%eax, %%eax\n\t" - "movw %%ss, %%ax\n\t" - :"=a"(newregs->ss)); -#endif -} - -/* * This function is responsible for capturing register states if coming * via panic otherwise just fix up the ss and sp if coming via kernel * mode exception. @@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, { if (oldregs) { memcpy(newregs, oldregs, sizeof(*newregs)); - crash_fixup_ss_esp(newregs, oldregs); } else { #ifdef CONFIG_X86_32 asm volatile("movl %%ebx,%0" : "=m"(newregs->bx)); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26d1eb83f72a..0cc5b611a113 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -686,6 +686,7 @@ struct kvm_vcpu_arch { u32 virtual_tsc_mult; u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; + u64 msr_ia32_power_ctl; u64 tsc_scaling_ratio; atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ @@ -752,6 +753,8 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; } pv_eoi; + u64 msr_kvm_poll_control; + /* * Indicate whether the access faults on its page table in guest * which is set when fix page fault and used to detect unhandeable @@ -879,6 +882,7 @@ struct kvm_arch { bool mwait_in_guest; bool hlt_in_guest; bool pause_in_guest; + bool cstate_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; @@ -926,6 +930,8 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + + struct kvm_pmu_event_filter *pmu_event_filter; }; struct kvm_vm_stat { @@ -996,7 +1002,7 @@ struct kvm_x86_ops { int (*disabled_by_bios)(void); /* __init */ int (*hardware_enable)(void); void (*hardware_disable)(void); - void (*check_processor_compatibility)(void *rtn); + int (*check_processor_compatibility)(void);/* __init */ int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); @@ -1110,7 +1116,7 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - void (*handle_external_intr)(struct kvm_vcpu *vcpu); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); bool (*mpx_supported)(void); bool (*xsaves_supported)(void); bool (*umip_emulated)(void); @@ -1529,7 +1535,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -u64 kvm_get_arch_capabilities(void); void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 5ff3e8af2c20..e78c7db87801 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -59,6 +59,7 @@ typedef struct { #define INIT_MM_CONTEXT(mm) \ .context = { \ .ctx_id = 1, \ + .lock = __MUTEX_INITIALIZER(mm.context.lock), \ } void leave_mm(int cpu); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cc60e617931c..2ef31cc8c529 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -3,84 +3,15 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/atomic.h> #include <linux/nmi.h> #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> -#define VP_INVAL U32_MAX - -struct ms_hyperv_info { - u32 features; - u32 misc_features; - u32 hints; - u32 nested_features; - u32 max_vp_index; - u32 max_lp_index; -}; - -extern struct ms_hyperv_info ms_hyperv; - - typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); -/* - * Generate the guest ID. - */ - -static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, - __u64 d_info2) -{ - __u64 guest_id = 0; - - guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); - guest_id |= (d_info1 << 48); - guest_id |= (kernel_version << 16); - guest_id |= d_info2; - - return guest_id; -} - - -/* Free the message slot and signal end-of-message if required */ -static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) -{ - /* - * On crash we're reading some other CPU's message page and we need - * to be careful: this other CPU may already had cleared the header - * and the host may already had delivered some other message there. - * In case we blindly write msg->header.message_type we're going - * to lose it. We can still lose a message of the same type but - * we count on the fact that there can only be one - * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages - * on crash. - */ - if (cmpxchg(&msg->header.message_type, old_msg_type, - HVMSG_NONE) != old_msg_type) - return; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } -} - #define hv_init_timer(timer, tick) \ wrmsrl(HV_X64_MSR_STIMER0_COUNT + (2*timer), tick) #define hv_init_timer_config(timer, val) \ @@ -97,6 +28,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) +#define hv_signal_eom() wrmsrl(HV_X64_MSR_EOM, 0) + #define hv_get_synint_state(int_num, val) \ rdmsrl(HV_X64_MSR_SINT0 + int_num, val) #define hv_set_synint_state(int_num, val) \ @@ -105,19 +38,23 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_crash_ctl(val) \ rdmsrl(HV_X64_MSR_CRASH_CTL, val) +#define hv_get_time_ref_count(val) \ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, val) + +#define hv_get_reference_tsc(val) \ + rdmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_reference_tsc(val) \ + wrmsrl(HV_X64_MSR_REFERENCE_TSC, val) +#define hv_set_clocksource_vdso(val) \ + ((val).archdata.vclock_mode = VCLOCK_HVCLOCK) +#define hv_get_raw_timer() rdtsc_ordered() + void hyperv_callback_vector(void); void hyperv_reenlightenment_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector #endif void hyperv_vector_handler(struct pt_regs *regs); -void hv_setup_vmbus_irq(void (*handler)(void)); -void hv_remove_vmbus_irq(void); - -void hv_setup_kexec_handler(void (*handler)(void)); -void hv_remove_kexec_handler(void); -void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); -void hv_remove_crash_handler(void); /* * Routines for stimer0 Direct Mode handling. @@ -125,15 +62,12 @@ void hv_remove_crash_handler(void); */ void hv_stimer0_vector_handler(struct pt_regs *regs); void hv_stimer0_callback_vector(void); -int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); -void hv_remove_stimer0_irq(int irq); static inline void hv_enable_stimer0_percpu_irq(int irq) {} static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) -extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; @@ -272,14 +206,6 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, return status; } -/* - * Hypervisor's notion of virtual processor ID is different from - * Linux' notion of CPU ID. This information can only be retrieved - * in the context of the calling CPU. Setup a map for easy access - * to this information. - */ -extern u32 *hv_vp_index; -extern u32 hv_max_vp_index; extern struct hv_vp_assist_page **hv_vp_assist_page; static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) @@ -290,63 +216,8 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) return hv_vp_assist_page[cpu]; } -/** - * hv_cpu_number_to_vp_number() - Map CPU to VP. - * @cpu_number: CPU number in Linux terms - * - * This function returns the mapping between the Linux processor - * number and the hypervisor's virtual processor number, useful - * in making hypercalls and such that talk about specific - * processors. - * - * Return: Virtual processor number in Hyper-V terms - */ -static inline int hv_cpu_number_to_vp_number(int cpu_number) -{ - return hv_vp_index[cpu_number]; -} - -static inline int cpumask_to_vpset(struct hv_vpset *vpset, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - vpset->bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - if (vcpu == VP_INVAL) - return -1; - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &vpset->bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - return nr_bank; -} - void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyperv_report_panic(struct pt_regs *regs, long err); -void hyperv_report_panic_msg(phys_addr_t pa, size_t size); -bool hv_is_hyperv_initialized(void); -void hyperv_cleanup(void); void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); @@ -369,8 +240,6 @@ static inline void hv_apic_init(void) {} #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} -static inline bool hv_is_hyperv_initialized(void) { return false; } -static inline void hyperv_cleanup(void) {} static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} @@ -387,73 +256,7 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, } #endif /* CONFIG_HYPERV */ -#ifdef CONFIG_HYPERV_TSCPAGE -struct ms_hyperv_tsc_page *hv_get_tsc_page(void); -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - u64 scale, offset; - u32 sequence; - - /* - * The protocol for reading Hyper-V TSC page is specified in Hypervisor - * Top-Level Functional Specification ver. 3.0 and above. To get the - * reference time we must do the following: - * - READ ReferenceTscSequence - * A special '0' value indicates the time source is unreliable and we - * need to use something else. The currently published specification - * versions (up to 4.0b) contain a mistake and wrongly claim '-1' - * instead of '0' as the special value, see commit c35b82ef0294. - * - ReferenceTime = - * ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset - * - READ ReferenceTscSequence again. In case its value has changed - * since our first reading we need to discard ReferenceTime and repeat - * the whole sequence as the hypervisor was updating the page in - * between. - */ - do { - sequence = READ_ONCE(tsc_pg->tsc_sequence); - if (!sequence) - return U64_MAX; - /* - * Make sure we read sequence before we read other values from - * TSC page. - */ - smp_rmb(); - - scale = READ_ONCE(tsc_pg->tsc_scale); - offset = READ_ONCE(tsc_pg->tsc_offset); - *cur_tsc = rdtsc_ordered(); - - /* - * Make sure we read sequence after we read all other values - * from TSC page. - */ - smp_rmb(); - - } while (READ_ONCE(tsc_pg->tsc_sequence) != sequence); - - return mul_u64_u64_shr(*cur_tsc, scale, 64) + offset; -} - -static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg) -{ - u64 cur_tsc; - return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc); -} +#include <asm-generic/mshyperv.h> -#else -static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void) -{ - return NULL; -} - -static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, - u64 *cur_tsc) -{ - BUG(); - return U64_MAX; -} -#endif #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 979ef971cc78..6b4fc2788078 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,6 +61,15 @@ #define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 #define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) +#define MSR_IA32_UMWAIT_CONTROL 0xe1 +#define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) +#define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) + #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index eb0f80ce8524..e28f8b723b5c 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -86,9 +86,9 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - mds_idle_clear_cpu_buffers(); - trace_hardirqs_on(); + + mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index c2bf1de5d901..6fe76282aceb 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h @@ -9,12 +9,10 @@ struct olpc_platform_t { int flags; uint32_t boardrev; - int ecver; }; #define OLPC_F_PRESENT 0x01 #define OLPC_F_DCON 0x02 -#define OLPC_F_EC_WIDE_SCI 0x04 #ifdef CONFIG_OLPC @@ -64,13 +62,6 @@ static inline int olpc_board_at_least(uint32_t rev) return olpc_platform_info.boardrev >= rev; } -extern void olpc_ec_wakeup_set(u16 value); -extern void olpc_ec_wakeup_clear(u16 value); -extern bool olpc_ec_wakeup_available(void); - -extern int olpc_ec_mask_write(u16 bits); -extern int olpc_ec_sci_query(u16 *sci_value); - #else static inline int machine_is_olpc(void) @@ -83,14 +74,6 @@ static inline int olpc_has_dcon(void) return 0; } -static inline void olpc_ec_wakeup_set(u16 value) { } -static inline void olpc_ec_wakeup_clear(u16 value) { } - -static inline bool olpc_ec_wakeup_available(void) -{ - return false; -} - #endif #ifdef CONFIG_OLPC_XO1_PM @@ -101,20 +84,6 @@ extern void olpc_xo1_pm_wakeup_clear(u16 value); extern int pci_olpc_init(void); -/* SCI source values */ - -#define EC_SCI_SRC_EMPTY 0x00 -#define EC_SCI_SRC_GAME 0x01 -#define EC_SCI_SRC_BATTERY 0x02 -#define EC_SCI_SRC_BATSOC 0x04 -#define EC_SCI_SRC_BATERR 0x08 -#define EC_SCI_SRC_EBOOK 0x10 /* XO-1 only */ -#define EC_SCI_SRC_WLAN 0x20 /* XO-1 only */ -#define EC_SCI_SRC_ACPWR 0x40 -#define EC_SCI_SRC_BATCRIT 0x80 -#define EC_SCI_SRC_GPWAKE 0x100 /* XO-1.5 only */ -#define EC_SCI_SRC_ALL 0x1FF - /* GPIO assignments */ #define OLPC_GPIO_MIC_AC 1 diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 793c14c372cb..288b065955b7 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -48,7 +48,7 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ +/* See Documentation/x86/x86_64/mm.rst for a description of the memory map. */ #define __PHYSICAL_MASK_SHIFT 52 diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 2474e434a6f7..946f8f1f1efc 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -88,7 +88,7 @@ struct pv_init_ops { * the number of bytes of code generated, as we nop pad the * rest in generic code. */ - unsigned (*patch)(u8 type, void *insnbuf, + unsigned (*patch)(u8 type, void *insn_buff, unsigned long addr, unsigned len); } __no_randomize_layout; @@ -370,18 +370,11 @@ extern struct paravirt_patch_template pv_ops; /* Simple instruction patching code. */ #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" -#define DEF_NATIVE(ops, name, code) \ - __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ - asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) +unsigned paravirt_patch_ident_64(void *insn_buff, unsigned len); +unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len); +unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end); -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len); -unsigned paravirt_patch_default(u8 type, void *insnbuf, - unsigned long addr, unsigned len); - -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, - const char *start, const char *end); - -unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len); +unsigned native_patch(u8 type, void *insn_buff, unsigned long addr, unsigned len); int paravirt_disable_iospace(void); @@ -679,8 +672,8 @@ u64 _paravirt_ident_64(u64); /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 instrtype; /* type of this instruction */ + u8 *instr; /* original instructions */ + u8 type; /* type of this instruction */ u8 len; /* length of original instruction */ }; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 1a19d11cfbbd..2278797c769d 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -87,7 +87,7 @@ * don't give an lvalue though). */ extern void __bad_percpu_size(void); -#define percpu_to_op(op, var, val) \ +#define percpu_to_op(qual, op, var, val) \ do { \ typedef typeof(var) pto_T__; \ if (0) { \ @@ -97,22 +97,22 @@ do { \ } \ switch (sizeof(var)) { \ case 1: \ - asm(op "b %1,"__percpu_arg(0) \ + asm qual (op "b %1,"__percpu_arg(0) \ : "+m" (var) \ : "qi" ((pto_T__)(val))); \ break; \ case 2: \ - asm(op "w %1,"__percpu_arg(0) \ + asm qual (op "w %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pto_T__)(val))); \ break; \ case 4: \ - asm(op "l %1,"__percpu_arg(0) \ + asm qual (op "l %1,"__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pto_T__)(val))); \ break; \ case 8: \ - asm(op "q %1,"__percpu_arg(0) \ + asm qual (op "q %1,"__percpu_arg(0) \ : "+m" (var) \ : "re" ((pto_T__)(val))); \ break; \ @@ -124,7 +124,7 @@ do { \ * Generate a percpu add to memory instruction and optimize code * if one is added or subtracted. */ -#define percpu_add_op(var, val) \ +#define percpu_add_op(qual, var, val) \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ @@ -138,41 +138,41 @@ do { \ switch (sizeof(var)) { \ case 1: \ if (pao_ID__ == 1) \ - asm("incb "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incb "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decb "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decb "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addb %1, "__percpu_arg(0) \ + asm qual ("addb %1, "__percpu_arg(0) \ : "+m" (var) \ : "qi" ((pao_T__)(val))); \ break; \ case 2: \ if (pao_ID__ == 1) \ - asm("incw "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incw "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decw "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decw "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addw %1, "__percpu_arg(0) \ + asm qual ("addw %1, "__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pao_T__)(val))); \ break; \ case 4: \ if (pao_ID__ == 1) \ - asm("incl "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incl "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decl "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decl "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addl %1, "__percpu_arg(0) \ + asm qual ("addl %1, "__percpu_arg(0) \ : "+m" (var) \ : "ri" ((pao_T__)(val))); \ break; \ case 8: \ if (pao_ID__ == 1) \ - asm("incq "__percpu_arg(0) : "+m" (var)); \ + asm qual ("incq "__percpu_arg(0) : "+m" (var)); \ else if (pao_ID__ == -1) \ - asm("decq "__percpu_arg(0) : "+m" (var)); \ + asm qual ("decq "__percpu_arg(0) : "+m" (var)); \ else \ - asm("addq %1, "__percpu_arg(0) \ + asm qual ("addq %1, "__percpu_arg(0) \ : "+m" (var) \ : "re" ((pao_T__)(val))); \ break; \ @@ -180,27 +180,27 @@ do { \ } \ } while (0) -#define percpu_from_op(op, var) \ +#define percpu_from_op(qual, op, var) \ ({ \ typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ - asm volatile(op "b "__percpu_arg(1)",%0"\ + asm qual (op "b "__percpu_arg(1)",%0" \ : "=q" (pfo_ret__) \ : "m" (var)); \ break; \ case 2: \ - asm volatile(op "w "__percpu_arg(1)",%0"\ + asm qual (op "w "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 4: \ - asm volatile(op "l "__percpu_arg(1)",%0"\ + asm qual (op "l "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ case 8: \ - asm volatile(op "q "__percpu_arg(1)",%0"\ + asm qual (op "q "__percpu_arg(1)",%0" \ : "=r" (pfo_ret__) \ : "m" (var)); \ break; \ @@ -238,23 +238,23 @@ do { \ pfo_ret__; \ }) -#define percpu_unary_op(op, var) \ +#define percpu_unary_op(qual, op, var) \ ({ \ switch (sizeof(var)) { \ case 1: \ - asm(op "b "__percpu_arg(0) \ + asm qual (op "b "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 2: \ - asm(op "w "__percpu_arg(0) \ + asm qual (op "w "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 4: \ - asm(op "l "__percpu_arg(0) \ + asm qual (op "l "__percpu_arg(0) \ : "+m" (var)); \ break; \ case 8: \ - asm(op "q "__percpu_arg(0) \ + asm qual (op "q "__percpu_arg(0) \ : "+m" (var)); \ break; \ default: __bad_percpu_size(); \ @@ -264,27 +264,27 @@ do { \ /* * Add return operation */ -#define percpu_add_return_op(var, val) \ +#define percpu_add_return_op(qual, var, val) \ ({ \ typeof(var) paro_ret__ = val; \ switch (sizeof(var)) { \ case 1: \ - asm("xaddb %0, "__percpu_arg(1) \ + asm qual ("xaddb %0, "__percpu_arg(1) \ : "+q" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 2: \ - asm("xaddw %0, "__percpu_arg(1) \ + asm qual ("xaddw %0, "__percpu_arg(1) \ : "+r" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 4: \ - asm("xaddl %0, "__percpu_arg(1) \ + asm qual ("xaddl %0, "__percpu_arg(1) \ : "+r" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ case 8: \ - asm("xaddq %0, "__percpu_arg(1) \ + asm qual ("xaddq %0, "__percpu_arg(1) \ : "+re" (paro_ret__), "+m" (var) \ : : "memory"); \ break; \ @@ -299,13 +299,13 @@ do { \ * expensive due to the implied lock prefix. The processor cannot prefetch * cachelines if xchg is used. */ -#define percpu_xchg_op(var, nval) \ +#define percpu_xchg_op(qual, var, nval) \ ({ \ typeof(var) pxo_ret__; \ typeof(var) pxo_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("\n\tmov "__percpu_arg(1)",%%al" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%al" \ "\n1:\tcmpxchgb %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -313,7 +313,7 @@ do { \ : "memory"); \ break; \ case 2: \ - asm("\n\tmov "__percpu_arg(1)",%%ax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \ "\n1:\tcmpxchgw %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -321,7 +321,7 @@ do { \ : "memory"); \ break; \ case 4: \ - asm("\n\tmov "__percpu_arg(1)",%%eax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \ "\n1:\tcmpxchgl %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -329,7 +329,7 @@ do { \ : "memory"); \ break; \ case 8: \ - asm("\n\tmov "__percpu_arg(1)",%%rax" \ + asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \ "\n1:\tcmpxchgq %2, "__percpu_arg(1) \ "\n\tjnz 1b" \ : "=&a" (pxo_ret__), "+m" (var) \ @@ -345,32 +345,32 @@ do { \ * cmpxchg has no such implied lock semantics as a result it is much * more efficient for cpu local operations. */ -#define percpu_cmpxchg_op(var, oval, nval) \ +#define percpu_cmpxchg_op(qual, var, oval, nval) \ ({ \ typeof(var) pco_ret__; \ typeof(var) pco_old__ = (oval); \ typeof(var) pco_new__ = (nval); \ switch (sizeof(var)) { \ case 1: \ - asm("cmpxchgb %2, "__percpu_arg(1) \ + asm qual ("cmpxchgb %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "q" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 2: \ - asm("cmpxchgw %2, "__percpu_arg(1) \ + asm qual ("cmpxchgw %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 4: \ - asm("cmpxchgl %2, "__percpu_arg(1) \ + asm qual ("cmpxchgl %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ break; \ case 8: \ - asm("cmpxchgq %2, "__percpu_arg(1) \ + asm qual ("cmpxchgq %2, "__percpu_arg(1) \ : "=a" (pco_ret__), "+m" (var) \ : "r" (pco_new__), "0" (pco_old__) \ : "memory"); \ @@ -391,58 +391,70 @@ do { \ */ #define this_cpu_read_stable(var) percpu_stable_op("mov", var) -#define raw_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_read_4(pcp) percpu_from_op("mov", pcp) - -#define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) -#define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) - -#define this_cpu_read_1(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_2(pcp) percpu_from_op("mov", pcp) -#define this_cpu_read_4(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) - -#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) -#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp) + +#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val) + +/* + * raw_cpu_xchg() can use a load-store since it is not required to be + * IRQ-safe. + */ +#define raw_percpu_xchg_op(var, nval) \ +({ \ + typeof(var) pxo_ret__ = raw_cpu_read(var); \ + raw_cpu_write(var, (nval)); \ + pxo_ret__; \ +}) + +#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) +#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) + +#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval) + +#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) +#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) +#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) + +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) +#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) #ifdef CONFIG_X86_CMPXCHG64 #define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \ @@ -466,23 +478,23 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 -#define raw_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) - -#define this_cpu_read_8(pcp) percpu_from_op("mov", pcp) -#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) -#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) -#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) -#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) -#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) -#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) +#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp) +#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val) +#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val) +#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val) +#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val) +#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val) +#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval) +#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval) + +#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp) +#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val) +#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval) +#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval) /* * Pretty complex macro to generate cmpxchg16 instruction. The instruction diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index a281e61ec60c..29aa7859bdee 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -6,6 +6,9 @@ #include <linux/mm.h> /* for struct page */ #include <linux/pagemap.h> +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */ + static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; } #ifdef CONFIG_PARAVIRT_XXL @@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp; extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); -extern pte_t *pte_alloc_one_kernel(struct mm_struct *); extern pgtable_t pte_alloc_one(struct mm_struct *); -/* Should really implement gc for free page table pages. This could be - done with a reference count in struct page. */ - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - BUG_ON((unsigned long)pte & (PAGE_SIZE-1)); - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte); static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index f8b1ad2c3828..e3633795fb22 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp) #define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \ __pteval_swp_offset(pte))) -#define gup_get_pte gup_get_pte -/* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atomically, - * but that is not possible (without expensive cmpxchg8b) on PAE. What - * we do have is the guarantee that a PTE will only either go from not - * present to present, or present to not present or both -- it will not - * switch to a completely different present page without a TLB flush in - * between; something that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees 'l' iff pte_high - * sees 'h'. We load pte_high *after* loading pte_low, which ensures we - * don't see an older value of pte_high. *Then* we recheck pte_low, - * which ensures that we haven't picked up a changed pte high. We might - * have gotten rubbish values from pte_low and pte_high, but we are - * guaranteed that pte_low will not have the present bit set *unless* - * it is 'l'. Because get_user_pages_fast() only operates on present ptes - * we're safe. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - pte_t pte; - - do { - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - } while (unlikely(pte.pte_low != ptep->pte_low)); - - return pte; -} - #include <asm/pgtable-invert.h> #endif /* _ASM_X86_PGTABLE_3LEVEL_H */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 4fe9e7fc74d3..c78da8eda8f2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -106,6 +106,6 @@ do { \ * with only a host target support using a 32-bit type for internal * representation. */ -#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) +#define LOWMEM_PAGES ((((_ULL(2)<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0bb566315621..4990d26dfc73 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted -static inline bool gup_fast_permitted(unsigned long start, int nr_pages) +static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { - unsigned long len, end; - - len = (unsigned long)nr_pages << PAGE_SHIFT; - end = start + len; - if (end < start) - return false; if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 88bca456da99..52e5f5f2240d 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -103,7 +103,7 @@ extern unsigned int ptrs_per_p4d; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* - * See Documentation/x86/x86_64/mm.txt for a description of the memory map. + * See Documentation/x86/x86_64/mm.rst for a description of the memory map. * * Be very careful vs. KASLR when changing anything here. The KASLR address * range must not overlap with anything except the KASAN shadow area, which diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c34a35c78618..6e0a3b43d027 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -105,7 +105,7 @@ struct cpuinfo_x86 { int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ - u16 x86_max_cores; + u16 x86_max_cores; u16 apicid; u16 initial_apicid; u16 x86_clflush_size; @@ -117,6 +117,8 @@ struct cpuinfo_x86 { u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; + u16 cpu_die_id; + u16 logical_die_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; @@ -144,7 +146,8 @@ enum cpuid_regs_idx { #define X86_VENDOR_TRANSMETA 7 #define X86_VENDOR_NSC 8 #define X86_VENDOR_HYGON 9 -#define X86_VENDOR_NUM 10 +#define X86_VENDOR_ZHAOXIN 10 +#define X86_VENDOR_NUM 11 #define X86_VENDOR_UNKNOWN 0xff @@ -738,6 +741,7 @@ extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); extern void load_percpu_segment(int); extern void cpu_init(void); +extern void cr4_init(void); static inline unsigned long get_debugctlmsr(void) { diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 8a7fc0cca2d1..332eb3525867 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -98,12 +98,10 @@ struct cpuinfo_x86; struct task_struct; extern unsigned long profile_pc(struct pt_regs *regs); -#define profile_pc profile_pc extern unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs); -extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code); +extern void send_sigtrap(struct pt_regs *regs, int error_code, int si_code); static inline unsigned long regs_return_value(struct pt_regs *regs) @@ -166,20 +164,37 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #define compat_user_stack_pointer() current_pt_regs()->sp #endif -#ifdef CONFIG_X86_32 -extern unsigned long kernel_stack_pointer(struct pt_regs *regs); -#else static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { return regs->sp; } -#endif -#define GET_IP(regs) ((regs)->ip) -#define GET_FP(regs) ((regs)->bp) -#define GET_USP(regs) ((regs)->sp) +static inline unsigned long instruction_pointer(struct pt_regs *regs) +{ + return regs->ip; +} + +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->ip = val; +} + +static inline unsigned long frame_pointer(struct pt_regs *regs) +{ + return regs->bp; +} + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} -#include <asm-generic/ptrace.h> +static inline void user_stack_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + regs->sp = val; +} /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); @@ -201,14 +216,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, if (unlikely(offset > MAX_REG_OFFSET)) return 0; #ifdef CONFIG_X86_32 - /* - * Traps from the kernel do not save sp and ss. - * Use the helper function to retrieve sp. - */ - if (offset == offsetof(struct pt_regs, sp) && - regs->cs == __KERNEL_CS) - return kernel_stack_pointer(regs); - /* The selector fields are 16-bit. */ if (offset == offsetof(struct pt_regs, cs) || offset == offsetof(struct pt_regs, ss) || @@ -234,8 +241,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, static inline int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { - return ((addr & ~(THREAD_SIZE - 1)) == - (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); + return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1))); } /** @@ -249,7 +255,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, */ static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + unsigned long *addr = (unsigned long *)regs->sp; addr += n; if (regs_within_kernel_stack(regs, (unsigned long)addr)) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index b6033680d458..19b695ff2c68 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_PVCLOCK_H #define _ASM_X86_PVCLOCK_H -#include <linux/clocksource.h> +#include <asm/clocksource.h> #include <asm/pvclock-abi.h> /* some helper functions for xen and kvm pv clock sources */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 8ea1cfdbeabc..71b32f2570ab 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -13,4 +13,6 @@ extern char __end_rodata_aligned[]; extern char __end_rodata_hpage_align[]; #endif +extern char __end_of_kernel_reserve[]; + #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index da545df207b2..e1356a3b8223 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -23,6 +23,7 @@ extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); @@ -162,7 +163,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -#define raw_smp_processor_id() (this_cpu_read(cpu_number)) +#define raw_smp_processor_id() this_cpu_read(cpu_number) +#define __smp_processor_id() __this_cpu_read(cpu_number) #ifdef CONFIG_X86_32 extern int safe_smp_processor_id(void); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 0a3c4cab39db..219be88a59d2 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -6,6 +6,8 @@ #ifdef __KERNEL__ #include <asm/nops.h> +#include <asm/processor-flags.h> +#include <linux/jump_label.h> /* * Volatile isn't enough to prevent the compiler from reordering the @@ -16,6 +18,8 @@ */ extern unsigned long __force_order; +void native_write_cr0(unsigned long val); + static inline unsigned long native_read_cr0(void) { unsigned long val; @@ -23,11 +27,6 @@ static inline unsigned long native_read_cr0(void) return val; } -static inline void native_write_cr0(unsigned long val) -{ - asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order)); -} - static inline unsigned long native_read_cr2(void) { unsigned long val; @@ -72,10 +71,7 @@ static inline unsigned long native_read_cr4(void) return val; } -static inline void native_write_cr4(unsigned long val) -{ - asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order)); -} +void native_write_cr4(unsigned long val); #ifdef CONFIG_X86_64 static inline unsigned long native_read_cr8(void) diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index a8d0cdf48616..14db05086bbf 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -78,7 +78,7 @@ static inline unsigned long * get_stack_pointer(struct task_struct *task, struct pt_regs *regs) { if (regs) - return (unsigned long *)kernel_stack_pointer(regs); + return (unsigned long *)regs->sp; if (task == current) return __builtin_frame_address(0); diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 880b5515b1d6..70c09967a999 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -18,6 +18,20 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +/* + * Currently, the max observed size in the kernel code is + * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. + * Raise it if needed. + */ +#define POKE_MAX_OPCODE_SIZE 5 + +struct text_poke_loc { + void *detour; + void *addr; + size_t len; + const char opcode[POKE_MAX_OPCODE_SIZE]; +}; + extern void text_poke_early(void *addr, const void *opcode, size_t len); /* @@ -38,6 +52,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; @@ -51,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) #define INT3_INSN_SIZE 1 #define CALL_INSN_SIZE 5 -#ifdef CONFIG_X86_64 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* @@ -69,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } -#endif /* CONFIG_X86_64 */ #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index cef818b16045..8ac563abb567 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -7,6 +7,7 @@ extern void hpet_time_init(void); extern void time_init(void); +extern bool pit_timer_init(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 453cf38a1c33..4b14d2318251 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -106,15 +106,25 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); #define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) +#define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) +#define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) #ifdef CONFIG_SMP +#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) +extern unsigned int __max_die_per_package; + +static inline int topology_max_die_per_package(void) +{ + return __max_die_per_package; +} + extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -123,14 +133,21 @@ static inline int topology_max_smt_threads(void) } int topology_update_package_map(unsigned int apicid, unsigned int cpu); +int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +int topology_phys_to_logical_die(unsigned int die, unsigned int cpu); bool topology_is_primary_thread(unsigned int cpu); bool topology_smt_supported(void); #else #define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } +static inline int +topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_phys_to_logical_die(unsigned int die, + unsigned int cpu) { return 0; } +static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline bool topology_smt_supported(void) { return false; } diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 146859efd83c..097589753fec 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -54,5 +54,6 @@ # define __ARCH_WANT_SYS_FORK # define __ARCH_WANT_SYS_VFORK # define __ARCH_WANT_SYS_CLONE +# define __ARCH_WANT_SYS_CLONE3 #endif /* _ASM_X86_UNISTD_H */ diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..ae91429129a6 --- /dev/null +++ b/arch/x86/include/asm/vdso/gettimeofday.h @@ -0,0 +1,261 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * Copyright (C) 2019 ARM Limited. + * Copyright 2006 Andi Kleen, SUSE Labs. + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <uapi/linux/time.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> +#include <asm/unistd.h> +#include <asm/msr.h> +#include <asm/pvclock.h> +#include <clocksource/hyperv_timer.h> + +#define __vdso_data (VVAR(_vdso_data)) + +#define VDSO_HAS_TIME 1 + +#define VDSO_HAS_CLOCK_GETRES 1 + +/* + * Declare the memory-mapped vclock data pages. These come from hypervisors. + * If we ever reintroduce something like direct access to an MMIO clock like + * the HPET again, it will go here as well. + * + * A load from any of these pages will segfault if the clock in question is + * disabled, so appropriate compiler barriers and checks need to be used + * to prevent stray loads. + * + * These declarations MUST NOT be const. The compiler will assume that + * an extern const variable has genuinely constant contents, and the + * resulting code won't work, since the whole point is that these pages + * change over time, possibly while we're accessing them. + */ + +#ifdef CONFIG_PARAVIRT_CLOCK +/* + * This is the vCPU 0 pvclock page. We only use pvclock from the vDSO + * if the hypervisor tells us that all vCPUs can get valid data from the + * vCPU 0 page. + */ +extern struct pvclock_vsyscall_time_info pvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +extern struct ms_hyperv_tsc_page hvclock_page + __attribute__((visibility("hidden"))); +#endif + +#ifndef BUILD_VDSO32 + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_gettime), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm("syscall" : "=a" (ret) : + "0" (__NR_gettimeofday), "D" (_tv), "S" (_tz) : "memory"); + + return ret; +} + +static __always_inline +long clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ("syscall" : "=a" (ret), "=m" (*_ts) : + "0" (__NR_clock_getres), "D" (_clkid), "S" (_ts) : + "rcx", "r11"); + + return ret; +} + +#else + +static __always_inline +long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_gettime64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +static __always_inline +long gettimeofday_fallback(struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + long ret; + + asm( + "mov %%ebx, %%edx \n" + "mov %2, %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret) + : "0" (__NR_gettimeofday), "g" (_tv), "c" (_tz) + : "memory", "edx"); + + return ret; +} + +static __always_inline long +clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts) +{ + long ret; + + asm ( + "mov %%ebx, %%edx \n" + "mov %[clock], %%ebx \n" + "call __kernel_vsyscall \n" + "mov %%edx, %%ebx \n" + : "=a" (ret), "=m" (*_ts) + : "0" (__NR_clock_getres_time64), [clock] "g" (_clkid), "c" (_ts) + : "edx"); + + return ret; +} + +#endif + +#ifdef CONFIG_PARAVIRT_CLOCK +static u64 vread_pvclock(void) +{ + const struct pvclock_vcpu_time_info *pvti = &pvclock_page.pvti; + u32 version; + u64 ret; + + /* + * Note: The kernel and hypervisor must guarantee that cpu ID + * number maps 1:1 to per-CPU pvclock time info. + * + * Because the hypervisor is entirely unaware of guest userspace + * preemption, it cannot guarantee that per-CPU pvclock time + * info is updated if the underlying CPU changes or that that + * version is increased whenever underlying CPU changes. + * + * On KVM, we are guaranteed that pvti updates for any vCPU are + * atomic as seen by *all* vCPUs. This is an even stronger + * guarantee than we get with a normal seqlock. + * + * On Xen, we don't appear to have that guarantee, but Xen still + * supplies a valid seqlock using the version field. + * + * We only do pvclock vdso timing at all if + * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to + * mean that all vCPUs have matching pvti and that the TSC is + * synced, so we can just look at vCPU 0's pvti. + */ + + do { + version = pvclock_read_begin(pvti); + + if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) + return U64_MAX; + + ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); + } while (pvclock_read_retry(pvti, version)); + + return ret; +} +#endif + +#ifdef CONFIG_HYPERV_TSCPAGE +static u64 vread_hvclock(void) +{ + return hv_read_tsc_page(&hvclock_page); +} +#endif + +static inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + if (clock_mode == VCLOCK_TSC) + return (u64)rdtsc_ordered(); + /* + * For any memory-mapped vclock type, we need to make sure that gcc + * doesn't cleverly hoist a load before the mode check. Otherwise we + * might end up touching the memory-mapped page even if the vclock in + * question isn't enabled, which will segfault. Hence the barriers. + */ +#ifdef CONFIG_PARAVIRT_CLOCK + if (clock_mode == VCLOCK_PVCLOCK) { + barrier(); + return vread_pvclock(); + } +#endif +#ifdef CONFIG_HYPERV_TSCPAGE + if (clock_mode == VCLOCK_HVCLOCK) { + barrier(); + return vread_hvclock(); + } +#endif + return U64_MAX; +} + +static __always_inline const struct vdso_data *__arch_get_vdso_data(void) +{ + return __vdso_data; +} + +/* + * x86 specific delta calculation. + * + * The regular implementation assumes that clocksource reads are globally + * monotonic. The TSC can be slightly off across sockets which can cause + * the regular delta calculation (@cycles - @last) to return a huge time + * jump. + * + * Therefore it needs to be verified that @cycles are greater than + * @last. If not then use @last, which is the base time of the current + * conversion period. + * + * This variant also removes the masking of the subtraction because the + * clocksource mask of all VDSO capable clocksources on x86 is U64_MAX + * which would result in a pointless operation. The compiler cannot + * optimize it away as the mask comes from the vdso data and is not compile + * time constant. + */ +static __always_inline +u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult) +{ + if (cycles > last) + return (cycles - last) * mult; + return 0; +} +#define vdso_calc_delta vdso_calc_delta + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0026ab2123ce --- /dev/null +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <asm/vgtod.h> +#include <asm/vvar.h> + +int vclocks_used __read_mostly; + +DEFINE_VVAR(struct vdso_data, _vdso_data); +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__x86_get_k_vdso_data(void) +{ + return _vdso_data; +} +#define __arch_get_k_vdso_data __x86_get_k_vdso_data + +static __always_inline +int __x86_get_clock_mode(struct timekeeper *tk) +{ + int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + + /* Mark the new vclock used. */ + BUILD_BUG_ON(VCLOCK_MAX >= 32); + WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode)); + + return vclock_mode; +} +#define __arch_get_clock_mode __x86_get_clock_mode + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 913a133f8e6f..a2638c6124ed 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -3,7 +3,9 @@ #define _ASM_X86_VGTOD_H #include <linux/compiler.h> -#include <linux/clocksource.h> +#include <asm/clocksource.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> #include <uapi/linux/time.h> @@ -13,81 +15,10 @@ typedef u64 gtod_long_t; typedef unsigned long gtod_long_t; #endif -/* - * There is one of these objects in the vvar page for each - * vDSO-accelerated clockid. For high-resolution clocks, this encodes - * the time corresponding to vsyscall_gtod_data.cycle_last. For coarse - * clocks, this encodes the actual time. - * - * To confuse the reader, for high-resolution clocks, nsec is left-shifted - * by vsyscall_gtod_data.shift. - */ -struct vgtod_ts { - u64 sec; - u64 nsec; -}; - -#define VGTOD_BASES (CLOCK_TAI + 1) -#define VGTOD_HRES (BIT(CLOCK_REALTIME) | BIT(CLOCK_MONOTONIC) | BIT(CLOCK_TAI)) -#define VGTOD_COARSE (BIT(CLOCK_REALTIME_COARSE) | BIT(CLOCK_MONOTONIC_COARSE)) - -/* - * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time - * so be carefull by modifying this structure. - */ -struct vsyscall_gtod_data { - unsigned int seq; - - int vclock_mode; - u64 cycle_last; - u64 mask; - u32 mult; - u32 shift; - - struct vgtod_ts basetime[VGTOD_BASES]; - - int tz_minuteswest; - int tz_dsttime; -}; -extern struct vsyscall_gtod_data vsyscall_gtod_data; - extern int vclocks_used; static inline bool vclock_was_used(int vclock) { return READ_ONCE(vclocks_used) & (1 << vclock); } -static inline unsigned int gtod_read_begin(const struct vsyscall_gtod_data *s) -{ - unsigned int ret; - -repeat: - ret = READ_ONCE(s->seq); - if (unlikely(ret & 1)) { - cpu_relax(); - goto repeat; - } - smp_rmb(); - return ret; -} - -static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, - unsigned int start) -{ - smp_rmb(); - return unlikely(s->seq != start); -} - -static inline void gtod_write_begin(struct vsyscall_gtod_data *s) -{ - ++s->seq; - smp_wmb(); -} - -static inline void gtod_write_end(struct vsyscall_gtod_data *s) -{ - smp_wmb(); - ++s->seq; -} - #endif /* _ASM_X86_VGTOD_H */ diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index b986b2ca688a..ab60a71a8dcb 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h @@ -13,10 +13,12 @@ extern void set_vsyscall_pgtable_user_bits(pgd_t *root); * Called on instruction fetch fault in vsyscall page. * Returns true if handled. */ -extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); +extern bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address); #else static inline void map_vsyscall(void) {} -static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) +static inline bool emulate_vsyscall(unsigned long error_code, + struct pt_regs *regs, unsigned long address) { return false; } diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index e474f5c6e387..32f5d9a0b90e 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h @@ -32,19 +32,20 @@ extern char __vvar_page; #define DECLARE_VVAR(offset, type, name) \ - extern type vvar_ ## name __attribute__((visibility("hidden"))); + extern type vvar_ ## name[CS_BASES] \ + __attribute__((visibility("hidden"))); #define VVAR(name) (vvar_ ## name) #define DEFINE_VVAR(type, name) \ - type name \ + type name[CS_BASES] \ __attribute__((section(".vvar_" #name), aligned(16))) __visible #endif /* DECLARE_VVAR(offset, type, name) */ -DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) +DECLARE_VVAR(128, struct vdso_data, _vdso_data) #undef DECLARE_VVAR diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 60733f137e9a..c895df5482c5 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -29,6 +29,8 @@ #define XLF_EFI_HANDOVER_32 (1<<2) #define XLF_EFI_HANDOVER_64 (1<<3) #define XLF_EFI_KEXEC (1<<4) +#define XLF_5LEVEL (1<<5) +#define XLF_5LEVEL_ENABLED (1<<6) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index d6ab5b4d15e5..e901b0ab116f 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -378,10 +378,11 @@ struct kvm_sync_regs { struct kvm_vcpu_events events; }; -#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) -#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) -#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) -#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) +#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) +#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3) +#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ @@ -432,4 +433,14 @@ struct kvm_nested_state { } data; }; +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u64 events[0]; +}; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316..2a8e0b6b9805 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,6 +29,8 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12 +#define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_HINTS_REALTIME 0 @@ -47,6 +49,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05 struct kvm_steal_time { __u64 steal; diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h index ac67bbea10ca..7c9d2bb3833b 100644 --- a/arch/x86/include/uapi/asm/perf_regs.h +++ b/arch/x86/include/uapi/asm/perf_regs.h @@ -52,4 +52,7 @@ enum perf_event_x86_regs { /* These include both GPRs and XMMX registers */ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2, }; + +#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1)) + #endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..f0b0c90dd398 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,7 +146,6 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 -#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ce1b5cc360a2..3578ad248bc9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -30,7 +30,7 @@ KASAN_SANITIZE_paravirt.o := n OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y -OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_paravirt_patch.o := y ifdef CONFIG_FRAME_POINTER OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y @@ -112,7 +112,7 @@ obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index a5e5484988fd..caf2edccbad2 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -64,6 +64,21 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, c->x86_stepping >= 0x0e)) flags->bm_check = 1; } + + if (c->x86_vendor == X86_VENDOR_ZHAOXIN) { + /* + * All Zhaoxin CPUs that support C3 share cache. + * And caches should not be flushed by software while + * entering C3 type state. + */ + flags->bm_check = 1; + /* + * On all recent Zhaoxin platforms, ARB_DISABLE is a nop. + * So, set bm_control to zero to indicate that ARB_DISABLE + * is not required while entering C3 type state. + */ + flags->bm_control = 0; + } } EXPORT_SYMBOL(acpi_processor_power_init_bm_check); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 390596b761e3..ccd32013c47a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -14,6 +14,7 @@ #include <linux/kdebug.h> #include <linux/kprobes.h> #include <linux/mmu_context.h> +#include <linux/bsearch.h> #include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> @@ -277,7 +278,7 @@ static inline bool is_jmp(const u8 opcode) } static void __init_or_module -recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) +recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) { u8 *next_rip, *tgt_rip; s32 n_dspl, o_dspl; @@ -286,7 +287,7 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) if (a->replacementlen != 5) return; - o_dspl = *(s32 *)(insnbuf + 1); + o_dspl = *(s32 *)(insn_buff + 1); /* next_rip of the replacement JMP */ next_rip = repl_insn + a->replacementlen; @@ -312,9 +313,9 @@ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf) two_byte_jmp: n_dspl -= 2; - insnbuf[0] = 0xeb; - insnbuf[1] = (s8)n_dspl; - add_nops(insnbuf + 2, 3); + insn_buff[0] = 0xeb; + insn_buff[1] = (s8)n_dspl; + add_nops(insn_buff + 2, 3); repl_len = 2; goto done; @@ -322,8 +323,8 @@ two_byte_jmp: five_byte_jmp: n_dspl -= 5; - insnbuf[0] = 0xe9; - *(s32 *)&insnbuf[1] = n_dspl; + insn_buff[0] = 0xe9; + *(s32 *)&insn_buff[1] = n_dspl; repl_len = 5; @@ -370,7 +371,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, { struct alt_instr *a; u8 *instr, *replacement; - u8 insnbuf[MAX_PATCH_LEN]; + u8 insn_buff[MAX_PATCH_LEN]; DPRINTK("alt table %px, -> %px", start, end); /* @@ -383,11 +384,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * order. */ for (a = start; a < end; a++) { - int insnbuf_sz = 0; + int insn_buff_sz = 0; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - BUG_ON(a->instrlen > sizeof(insnbuf)); + BUG_ON(a->instrlen > sizeof(insn_buff)); BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32); if (!boot_cpu_has(a->cpuid)) { if (a->padlen > 1) @@ -405,8 +406,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); - memcpy(insnbuf, replacement, a->replacementlen); - insnbuf_sz = a->replacementlen; + memcpy(insn_buff, replacement, a->replacementlen); + insn_buff_sz = a->replacementlen; /* * 0xe8 is a relative jump; fix the offset. @@ -414,24 +415,24 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, * Instruction length is checked before the opcode to avoid * accessing uninitialized bytes for zero-length replacements. */ - if (a->replacementlen == 5 && *insnbuf == 0xe8) { - *(s32 *)(insnbuf + 1) += replacement - instr; + if (a->replacementlen == 5 && *insn_buff == 0xe8) { + *(s32 *)(insn_buff + 1) += replacement - instr; DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", - *(s32 *)(insnbuf + 1), - (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5); + *(s32 *)(insn_buff + 1), + (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5); } if (a->replacementlen && is_jmp(replacement[0])) - recompute_jump(a, instr, replacement, insnbuf); + recompute_jump(a, instr, replacement, insn_buff); if (a->instrlen > a->replacementlen) { - add_nops(insnbuf + a->replacementlen, + add_nops(insn_buff + a->replacementlen, a->instrlen - a->replacementlen); - insnbuf_sz += a->instrlen - a->replacementlen; + insn_buff_sz += a->instrlen - a->replacementlen; } - DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr); + DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); - text_poke_early(instr, insnbuf, insnbuf_sz); + text_poke_early(instr, insn_buff, insn_buff_sz); } } @@ -593,33 +594,119 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start, struct paravirt_patch_site *end) { struct paravirt_patch_site *p; - char insnbuf[MAX_PATCH_LEN]; + char insn_buff[MAX_PATCH_LEN]; for (p = start; p < end; p++) { unsigned int used; BUG_ON(p->len > MAX_PATCH_LEN); /* prep the buffer with the original instructions */ - memcpy(insnbuf, p->instr, p->len); - used = pv_ops.init.patch(p->instrtype, insnbuf, - (unsigned long)p->instr, p->len); + memcpy(insn_buff, p->instr, p->len); + used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len); BUG_ON(used > p->len); /* Pad the rest with nops */ - add_nops(insnbuf + used, p->len - used); - text_poke_early(p->instr, insnbuf, p->len); + add_nops(insn_buff + used, p->len - used); + text_poke_early(p->instr, insn_buff, p->len); } } extern struct paravirt_patch_site __start_parainstructions[], __stop_parainstructions[]; #endif /* CONFIG_PARAVIRT */ +/* + * Self-test for the INT3 based CALL emulation code. + * + * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up + * properly and that there is a stack gap between the INT3 frame and the + * previous context. Without this gap doing a virtual PUSH on the interrupted + * stack would corrupt the INT3 IRET frame. + * + * See entry_{32,64}.S for more details. + */ + +/* + * We define the int3_magic() function in assembly to control the calling + * convention such that we can 'call' it from assembly. + */ + +extern void int3_magic(unsigned int *ptr); /* defined in asm */ + +asm ( +" .pushsection .init.text, \"ax\", @progbits\n" +" .type int3_magic, @function\n" +"int3_magic:\n" +" movl $1, (%" _ASM_ARG1 ")\n" +" ret\n" +" .size int3_magic, .-int3_magic\n" +" .popsection\n" +); + +extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ + +static int __init +int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct die_args *args = data; + struct pt_regs *regs = args->regs; + + if (!regs || user_mode(regs)) + return NOTIFY_DONE; + + if (val != DIE_INT3) + return NOTIFY_DONE; + + if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + return NOTIFY_DONE; + + int3_emulate_call(regs, (unsigned long)&int3_magic); + return NOTIFY_STOP; +} + +static void __init int3_selftest(void) +{ + static __initdata struct notifier_block int3_exception_nb = { + .notifier_call = int3_exception_notify, + .priority = INT_MAX-1, /* last */ + }; + unsigned int val = 0; + + BUG_ON(register_die_notifier(&int3_exception_nb)); + + /* + * Basically: int3_magic(&val); but really complicated :-) + * + * Stick the address of the INT3 instruction into int3_selftest_ip, + * then trigger the INT3, padded with NOPs to match a CALL instruction + * length. + */ + asm volatile ("1: int3; nop; nop; nop; nop\n\t" + ".pushsection .init.data,\"aw\"\n\t" + ".align " __ASM_SEL(4, 8) "\n\t" + ".type int3_selftest_ip, @object\n\t" + ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" + "int3_selftest_ip:\n\t" + __ASM_SEL(.long, .quad) " 1b\n\t" + ".popsection\n\t" + : ASM_CALL_CONSTRAINT + : __ASM_SEL_RAW(a, D) (&val) + : "memory"); + + BUG_ON(val != 1); + + unregister_die_notifier(&int3_exception_nb); +} + void __init alternative_instructions(void) { - /* The patching is not fully atomic, so try to avoid local interruptions - that might execute the to be patched code. - Other CPUs are not running. */ + int3_selftest(); + + /* + * The patching is not fully atomic, so try to avoid local + * interruptions that might execute the to be patched code. + * Other CPUs are not running. + */ stop_nmi(); /* @@ -644,10 +731,11 @@ void __init alternative_instructions(void) _text, _etext); } - if (!uniproc_patched || num_possible_cpus() == 1) + if (!uniproc_patched || num_possible_cpus() == 1) { free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); + } #endif apply_paravirt(__parainstructions, __parainstructions_end); @@ -848,81 +936,133 @@ static void do_sync_core(void *info) sync_core(); } -static bool bp_patching_in_progress; -static void *bp_int3_handler, *bp_int3_addr; +static struct bp_patching_desc { + struct text_poke_loc *vec; + int nr_entries; +} bp_patching; + +static int patch_cmp(const void *key, const void *elt) +{ + struct text_poke_loc *tp = (struct text_poke_loc *) elt; + + if (key < tp->addr) + return -1; + if (key > tp->addr) + return 1; + return 0; +} +NOKPROBE_SYMBOL(patch_cmp); int poke_int3_handler(struct pt_regs *regs) { + struct text_poke_loc *tp; + unsigned char int3 = 0xcc; + void *ip; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching_in_progress. + * bp_patching.nr_entries. * - * in_progress = TRUE INT3 + * nr_entries != 0 INT3 * WMB RMB - * write INT3 if (in_progress) + * write INT3 if (nr_entries) * - * Idem for bp_int3_handler. + * Idem for other elements in bp_patching. */ smp_rmb(); - if (likely(!bp_patching_in_progress)) + if (likely(!bp_patching.nr_entries)) return 0; - if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) + if (user_mode(regs)) return 0; - /* set up the specified breakpoint handler */ - regs->ip = (unsigned long) bp_int3_handler; + /* + * Discount the sizeof(int3). See text_poke_bp_batch(). + */ + ip = (void *) regs->ip - sizeof(int3); + + /* + * Skip the binary search if there is a single member in the vector. + */ + if (unlikely(bp_patching.nr_entries > 1)) { + tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries, + sizeof(struct text_poke_loc), + patch_cmp); + if (!tp) + return 0; + } else { + tp = bp_patching.vec; + if (tp->addr != ip) + return 0; + } + + /* set up the specified breakpoint detour */ + regs->ip = (unsigned long) tp->detour; return 1; } NOKPROBE_SYMBOL(poke_int3_handler); /** - * text_poke_bp() -- update instructions on live kernel on SMP - * @addr: address to patch - * @opcode: opcode of new instruction - * @len: length to copy - * @handler: address to jump to when the temporary breakpoint is hit + * text_poke_bp_batch() -- update instructions on live kernel on SMP + * @tp: vector of instructions to patch + * @nr_entries: number of entries in the vector * * Modify multi-byte instruction by using int3 breakpoint on SMP. * We completely avoid stop_machine() here, and achieve the * synchronization using int3 breakpoint. * * The way it is done: - * - add a int3 trap to the address that will be patched + * - For each entry in the vector: + * - add a int3 trap to the address that will be patched * - sync cores - * - update all but the first byte of the patched range + * - For each entry in the vector: + * - update all but the first byte of the patched range * - sync cores - * - replace the first byte (int3) by the first byte of - * replacing opcode + * - For each entry in the vector: + * - replace the first byte (int3) by the first byte of + * replacing opcode * - sync cores */ -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) { + int patched_all_but_first = 0; unsigned char int3 = 0xcc; - - bp_int3_handler = handler; - bp_int3_addr = (u8 *)addr + sizeof(int3); - bp_patching_in_progress = true; + unsigned int i; lockdep_assert_held(&text_mutex); + bp_patching.vec = tp; + bp_patching.nr_entries = nr_entries; + /* * Corresponding read barrier in int3 notifier for making sure the - * in_progress and handler are correctly ordered wrt. patching. + * nr_entries and handler are correctly ordered wrt. patching. */ smp_wmb(); - text_poke(addr, &int3, sizeof(int3)); + /* + * First step: add a int3 trap to the address that will be patched. + */ + for (i = 0; i < nr_entries; i++) + text_poke(tp[i].addr, &int3, sizeof(int3)); on_each_cpu(do_sync_core, NULL, 1); - if (len - sizeof(int3) > 0) { - /* patch all but the first byte */ - text_poke((char *)addr + sizeof(int3), - (const char *) opcode + sizeof(int3), - len - sizeof(int3)); + /* + * Second step: update all but the first byte of the patched range. + */ + for (i = 0; i < nr_entries; i++) { + if (tp[i].len - sizeof(int3) > 0) { + text_poke((char *)tp[i].addr + sizeof(int3), + (const char *)tp[i].opcode + sizeof(int3), + tp[i].len - sizeof(int3)); + patched_all_but_first++; + } + } + + if (patched_all_but_first) { /* * According to Intel, this core syncing is very likely * not necessary and we'd be safe even without it. But @@ -931,14 +1071,47 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) on_each_cpu(do_sync_core, NULL, 1); } - /* patch the first byte */ - text_poke(addr, opcode, sizeof(int3)); + /* + * Third step: replace the first byte (int3) by the first byte of + * replacing opcode. + */ + for (i = 0; i < nr_entries; i++) + text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); on_each_cpu(do_sync_core, NULL, 1); /* * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. */ - bp_patching_in_progress = false; + bp_patching.vec = NULL; + bp_patching.nr_entries = 0; } +/** + * text_poke_bp() -- update instructions on live kernel on SMP + * @addr: address to patch + * @opcode: opcode of new instruction + * @len: length to copy + * @handler: address to jump to when the temporary breakpoint is hit + * + * Update a single instruction with the vector in the stack, avoiding + * dynamically allocated memory. This function should be used when it is + * not possible to allocate memory. + */ +void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +{ + struct text_poke_loc tp = { + .detour = handler, + .addr = addr, + .len = len, + }; + + if (len > POKE_MAX_OPCODE_SIZE) { + WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE); + return; + } + + memcpy((void *)tp.opcode, opcode, len); + + text_poke_bp_batch(&tp, 1); +} diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 002aedc69393..d63e63b7d1d9 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -72,7 +72,7 @@ static const struct pci_device_id hygon_root_ids[] = { {} }; -const struct pci_device_id hygon_nb_misc_ids[] = { +static const struct pci_device_id hygon_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) }, {} }; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 177aa8ef2afa..1bd91cb7b320 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -195,7 +195,7 @@ static struct resource lapic_resource = { .flags = IORESOURCE_MEM | IORESOURCE_BUSY, }; -unsigned int lapic_timer_frequency = 0; +unsigned int lapic_timer_period = 0; static void apic_pm_activate(void); @@ -501,7 +501,7 @@ lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot) if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0; - __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); return 0; } @@ -805,11 +805,11 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) static int __init lapic_init_clockevent(void) { - if (!lapic_timer_frequency) + if (!lapic_timer_period) return -1; /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, TICK_NSEC, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); @@ -821,6 +821,33 @@ static int __init lapic_init_clockevent(void) return 0; } +bool __init apic_needs_pit(void) +{ + /* + * If the frequencies are not known, PIT is required for both TSC + * and apic timer calibration. + */ + if (!tsc_khz || !cpu_khz) + return true; + + /* Is there an APIC at all? */ + if (!boot_cpu_has(X86_FEATURE_APIC)) + return true; + + /* Deadline timer is based on TSC so no further PIT action required */ + if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + + /* APIC timer disabled? */ + if (disable_apic_timer) + return true; + /* + * The APIC timer frequency is known already, no PIT calibration + * required. If unknown, let the PIT be initialized. + */ + return lapic_timer_period == 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); @@ -839,7 +866,7 @@ static int __init calibrate_APIC_clock(void) */ if (!lapic_init_clockevent()) { apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); + lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -884,13 +911,13 @@ static int __init calibrate_APIC_clock(void) pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, &delta, &deltatsc); - lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent(); apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); + lapic_timer_period); if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -901,13 +928,13 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); + lapic_timer_period / (1000000 / HZ), + lapic_timer_period % (1000000 / HZ)); /* * Do a sanity check on the APIC calibration result */ - if (lapic_timer_frequency < (1000000 / HZ)) { + if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; @@ -1351,6 +1378,8 @@ void __init init_bsp_APIC(void) apic_write(APIC_LVT1, value); } +static void __init apic_bsp_setup(bool upmode); + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { @@ -1464,7 +1493,8 @@ static void apic_pending_intr_clear(void) if (queued) { if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); + max_loops = (long long)cpu_khz << 10; + max_loops -= ntsc - tsc; } else { max_loops--; } @@ -2040,21 +2070,32 @@ __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) entering_irq(); trace_spurious_apic_entry(vector); + inc_irq_stat(irq_spurious_count); + + /* + * If this is a spurious interrupt then do not acknowledge + */ + if (vector == SPURIOUS_APIC_VECTOR) { + /* See SDM vol 3 */ + pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n", + smp_processor_id()); + goto out; + } + /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. + * If it is a vectored one, verify it's set in the ISR. If set, + * acknowledge it. */ v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1)); - if (v & (1 << (vector & 0x1f))) + if (v & (1 << (vector & 0x1f))) { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n", + vector, smp_processor_id()); ack_APIC_irq(); - - inc_irq_stat(irq_spurious_count); - - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - pr_info("spurious APIC interrupt through vector %02x on CPU#%d, " - "should never happen.\n", vector, smp_processor_id()); - + } else { + pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n", + vector, smp_processor_id()); + } +out: trace_spurious_apic_exit(vector); exiting_irq(); } @@ -2415,11 +2456,8 @@ static void __init apic_bsp_up_setup(void) /** * apic_bsp_setup - Setup function for local apic and io-apic * @upmode: Force UP mode (for APIC_init_uniprocessor) - * - * Returns: - * apic_id of BSP APIC */ -void __init apic_bsp_setup(bool upmode) +static void __init apic_bsp_setup(bool upmode) { connect_bsp_APIC(); if (upmode) diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index bf083c3f1d73..bbdca603f94a 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -78,7 +78,7 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) int cpu = smp_processor_id(); if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } @@ -92,7 +92,7 @@ static void flat_send_IPI_allbutself(int vector) unsigned long mask = cpumask_bits(cpu_online_mask)[0]; if (cpu < BITS_PER_LONG) - clear_bit(cpu, &mask); + __clear_bit(cpu, &mask); _flat_send_IPI_mask(mask, vector); } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53aa234a6803..c7bb6c69f21c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -58,6 +58,7 @@ #include <asm/acpi.h> #include <asm/dma.h> #include <asm/timer.h> +#include <asm/time.h> #include <asm/i8259.h> #include <asm/setup.h> #include <asm/irq_remapping.h> @@ -1893,6 +1894,50 @@ static int ioapic_set_affinity(struct irq_data *irq_data, return ret; } +/* + * Interrupt shutdown masks the ioapic pin, but the interrupt might already + * be in flight, but not yet serviced by the target CPU. That means + * __synchronize_hardirq() would return and claim that everything is calmed + * down. So free_irq() would proceed and deactivate the interrupt and free + * resources. + * + * Once the target CPU comes around to service it it will find a cleared + * vector and complain. While the spurious interrupt is harmless, the full + * release of resources might prevent the interrupt from being acknowledged + * which keeps the hardware in a weird state. + * + * Verify that the corresponding Remote-IRR bits are clear. + */ +static int ioapic_irq_get_chip_state(struct irq_data *irqd, + enum irqchip_irq_state which, + bool *state) +{ + struct mp_chip_data *mcd = irqd->chip_data; + struct IO_APIC_route_entry rentry; + struct irq_pin_list *p; + + if (which != IRQCHIP_STATE_ACTIVE) + return -EINVAL; + + *state = false; + raw_spin_lock(&ioapic_lock); + for_each_irq_pin(p, mcd->irq_2_pin) { + rentry = __ioapic_read_entry(p->apic, p->pin); + /* + * The remote IRR is only valid in level trigger mode. It's + * meaning is undefined for edge triggered interrupts and + * irrelevant because the IO-APIC treats them as fire and + * forget. + */ + if (rentry.irr && rentry.trigger) { + *state = true; + break; + } + } + raw_spin_unlock(&ioapic_lock); + return 0; +} + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, @@ -1902,6 +1947,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1914,6 +1960,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_get_irqchip_state = ioapic_irq_get_chip_state, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -2083,6 +2130,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0; + if (!global_clock_event) + return; + local_irq_save(flags); /* diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index dad0dd759de2..7f7533462474 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -370,14 +370,14 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id) return d; } -int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev, +int hpet_assign_irq(struct irq_domain *domain, struct hpet_channel *hc, int dev_num) { struct irq_alloc_info info; init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_HPET; - info.hpet_data = dev; + info.hpet_data = hc; info.hpet_id = hpet_dev_id(domain); info.hpet_index = dev_num; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index e7cb78aed644..fdacb864c3dd 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -340,7 +340,7 @@ static void clear_irq_vector(struct irq_data *irqd) trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector, apicd->prev_cpu); - per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->cpu, vector, managed); apicd->vector = 0; @@ -349,7 +349,7 @@ static void clear_irq_vector(struct irq_data *irqd) if (!vector) return; - per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED; + per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN; irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed); apicd->prev_vector = 0; apicd->move_in_progress = 0; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 7685444a106b..609e499387a1 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -50,7 +50,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) cpumask_copy(tmpmsk, mask); /* If IPI should not be sent to self, clear current CPU */ if (apic_dest != APIC_DEST_ALLINC) - cpumask_clear_cpu(smp_processor_id(), tmpmsk); + __cpumask_clear_cpu(smp_processor_id(), tmpmsk); /* Collapse cpus in a cluster so a single IPI per cluster is sent */ for_each_cpu(cpu, tmpmsk) { diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 168543d077d7..da64452584b0 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -38,7 +38,6 @@ static void __used common(void) #endif BLANK(); - OFFSET(TASK_TI_flags, task_struct, thread_info.flags); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); BLANK(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 5102bf7c8192..d7a1e5a9331c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,6 +24,7 @@ obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o +obj-y += umwait.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o @@ -38,6 +39,7 @@ obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin.o obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ @@ -47,6 +49,7 @@ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o +obj-$(CONFIG_ACRN_GUEST) += acrn.o ifdef CONFIG_X86_FEATURE_NAMES quiet_cmd_mkcapflags = MKCAP $@ @@ -54,8 +57,7 @@ quiet_cmd_mkcapflags = MKCAP $@ cpufeature = $(src)/../../include/asm/cpufeatures.h -targets += capflags.c $(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE $(call if_changed,mkcapflags) endif -clean-files += capflags.c +targets += capflags.c diff --git a/arch/x86/kernel/cpu/acrn.c b/arch/x86/kernel/cpu/acrn.c new file mode 100644 index 000000000000..676022e71791 --- /dev/null +++ b/arch/x86/kernel/cpu/acrn.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ACRN detection support + * + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * + * Jason Chen CJ <jason.cj.chen@intel.com> + * Zhao Yakui <yakui.zhao@intel.com> + * + */ + +#include <linux/interrupt.h> +#include <asm/acrn.h> +#include <asm/apic.h> +#include <asm/desc.h> +#include <asm/hypervisor.h> +#include <asm/irq_regs.h> + +static uint32_t __init acrn_detect(void) +{ + return hypervisor_cpuid_base("ACRNACRNACRN\0\0", 0); +} + +static void __init acrn_init_platform(void) +{ + /* Setup the IDT for ACRN hypervisor callback */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, acrn_hv_callback_vector); +} + +static bool acrn_x2apic_available(void) +{ + /* + * x2apic is not supported for now. Future enablement will have to check + * X86_FEATURE_X2APIC to determine whether x2apic is supported in the + * guest. + */ + return false; +} + +static void (*acrn_intr_handler)(void); + +__visible void __irq_entry acrn_hv_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * The hypervisor requires that the APIC EOI should be acked. + * If the APIC EOI is not acked, the APIC ISR bit for the + * HYPERVISOR_CALLBACK_VECTOR will not be cleared and then it + * will block the interrupt whose vector is lower than + * HYPERVISOR_CALLBACK_VECTOR. + */ + entering_ack_irq(); + inc_irq_stat(irq_hv_callback_count); + + if (acrn_intr_handler) + acrn_intr_handler(); + + exiting_irq(); + set_irq_regs(old_regs); +} + +const __initconst struct hypervisor_x86 x86_hyper_acrn = { + .name = "ACRN", + .detect = acrn_detect, + .type = X86_HYPER_ACRN, + .init.init_platform = acrn_init_platform, + .init.x2apic_available = acrn_x2apic_available, +}; diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index e71a6ff8a67e..e2f319dc992d 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -13,6 +13,7 @@ #include <linux/percpu.h> #include <linux/cpufreq.h> #include <linux/smp.h> +#include <linux/sched/isolation.h> #include "cpu.h" @@ -85,6 +86,9 @@ unsigned int aperfmperf_get_khz(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); return per_cpu(samples.khz, cpu); } @@ -101,9 +105,12 @@ void arch_freq_prepare_all(void) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return; - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + continue; if (!aperfmperf_snapshot_cpu(cpu, now, false)) wait = true; + } if (wait) msleep(APERFMPERF_REFRESH_DELAY_MS); @@ -117,6 +124,9 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; + if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + return 0; + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) return per_cpu(samples.khz, cpu); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03b4cc0ec3a7..66ca906aa790 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -836,6 +836,16 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) } /* + * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper + * bit in the mask to allow guests to use the mitigation even in the + * case where the host does not enable it. + */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + } + + /* * We have three CPU feature flags that are in play here: * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass @@ -852,7 +862,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } } diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 395d46f78582..c7503be92f35 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -658,8 +658,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ per_cpu(cpu_llc_id, cpu) = node_id; - } else if (c->x86 == 0x17 && - c->x86_model >= 0 && c->x86_model <= 0x1F) { + } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. * Core complex ID is ApicId[3] for these processors. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c57fffebf9b..11472178e17f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -366,6 +366,77 @@ out: cr4_clear_bits(X86_CR4_UMIP); } +static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); +static unsigned long cr4_pinned_bits __ro_after_init; + +void native_write_cr0(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) { + bits_missing = X86_CR0_WP; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n"); + } +} +EXPORT_SYMBOL(native_write_cr0); + +void native_write_cr4(unsigned long val) +{ + unsigned long bits_missing = 0; + +set_register: + asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits)); + + if (static_branch_likely(&cr_pinning)) { + if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) { + bits_missing = ~val & cr4_pinned_bits; + val |= bits_missing; + goto set_register; + } + /* Warn after we've set the missing bits. */ + WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n", + bits_missing); + } +} +EXPORT_SYMBOL(native_write_cr4); + +void cr4_init(void) +{ + unsigned long cr4 = __read_cr4(); + + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4 |= X86_CR4_PCIDE; + if (static_branch_likely(&cr_pinning)) + cr4 |= cr4_pinned_bits; + + __write_cr4(cr4); + + /* Initialize cr4 shadow for this CPU. */ + this_cpu_write(cpu_tlbstate.cr4, cr4); +} + +/* + * Once CPU feature detection is finished (and boot params have been + * parsed), record any of the sensitive CR bits that are set, and + * enable CR pinning. + */ +static void __init setup_cr_pinning(void) +{ + unsigned long mask; + + mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP); + cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask; + static_key_enable(&cr_pinning.key); +} + /* * Protection Keys are not available in 32-bit mode. */ @@ -801,6 +872,30 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } +static void init_cqm(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -823,6 +918,12 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_7_0_EBX] = ebx; c->x86_capability[CPUID_7_ECX] = ecx; c->x86_capability[CPUID_7_EDX] = edx; + + /* Check valid sub-leaf index before accessing it */ + if (eax >= 1) { + cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_7_1_EAX] = eax; + } } /* Extended state features: level 0x0000000d */ @@ -832,33 +933,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_D_1_EAX] = eax; } - /* Additional Intel-defined flags: level 0x0000000F */ - if (c->cpuid_level >= 0x0000000F) { - - /* QoS sub-leaf, EAX=0Fh, ECX=0 */ - cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_0_EDX] = edx; - - if (cpu_has(c, X86_FEATURE_CQM_LLC)) { - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = ebx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); - c->x86_capability[CPUID_F_1_EDX] = edx; - - if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || - ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || - (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } - } else { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - } - } - /* AMD-defined flags: level 0x80000001 */ eax = cpuid_eax(0x80000000); c->extended_cpuid_level = eax; @@ -889,6 +963,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); + init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. @@ -1299,6 +1374,7 @@ static void validate_apic_and_package_id(struct cpuinfo_x86 *c) cpu, apicid, c->initial_apicid); } BUG_ON(topology_update_package_map(c->phys_proc_id, cpu)); + BUG_ON(topology_update_die_map(c->cpu_die_id, cpu)); #else c->logical_proc_id = 0; #endif @@ -1464,6 +1540,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + setup_cr_pinning(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1698,12 +1775,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - if (cpu) load_ucode_ap(); @@ -1798,12 +1869,6 @@ void cpu_init(void) wait_for_master_cpu(cpu); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); - show_ucode_info_early(); pr_info("Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 2c0bd38a44ab..b5353244749b 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -20,6 +20,7 @@ struct cpuid_dep { * but it's difficult to tell that to the init reference checker. */ static const struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_FXSR, X86_FEATURE_FPU }, { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, @@ -27,7 +28,11 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_CMOV, X86_FEATURE_FXSR }, + { X86_FEATURE_MMX, X86_FEATURE_FXSR }, + { X86_FEATURE_MMXEXT, X86_FEATURE_MMX }, { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XSAVE, X86_FEATURE_FXSR }, { X86_FEATURE_XMM, X86_FEATURE_FXSR }, { X86_FEATURE_XMM2, X86_FEATURE_XMM }, { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, @@ -59,6 +64,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_CQM_OCCUP_LLC, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_TOTAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_CQM_MBM_LOCAL, X86_FEATURE_CQM_LLC }, + { X86_FEATURE_AVX512_BF16, X86_FEATURE_AVX512VL }, {} }; diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 479ca4728de0..87e39ad8d873 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -32,6 +32,7 @@ extern const struct hypervisor_x86 x86_hyper_xen_pv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; extern const struct hypervisor_x86 x86_hyper_jailhouse; +extern const struct hypervisor_x86 x86_hyper_acrn; static const __initconst struct hypervisor_x86 * const hypervisors[] = { @@ -49,6 +50,9 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] = #ifdef CONFIG_JAILHOUSE_GUEST &x86_hyper_jailhouse, #endif +#ifdef CONFIG_ACRN_GUEST + &x86_hyper_acrn, +#endif }; enum x86_hypervisor_type x86_hyper_type; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index f17c1a714779..8d6d92ebeb54 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -66,6 +66,32 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +/* + * Processors which have self-snooping capability can handle conflicting + * memory type across CPUs by snooping its own cache. However, there exists + * CPU models in which having conflicting memory types still leads to + * unpredictable behavior, machine check errors, or hangs. Clear this + * feature to prevent its use on machines with known erratas. + */ +static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) +{ + switch (c->x86_model) { + case INTEL_FAM6_CORE_YONAH: + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_G: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_SANDYBRIDGE: + setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); + } +} + static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) @@ -304,6 +330,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) } check_mpx_erratum(c); + check_memory_type_self_snoop_errata(c); /* * Get the number of SMT siblings early from the extended topology diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 785050af85e5..6ea7fdc82f3c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -99,11 +99,6 @@ static struct smca_bank_name smca_names[] = { [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, }; -static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = -{ - [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } -}; - static const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -197,6 +192,9 @@ static char buf_mcatype[MAX_MCATYPE_NAME_LEN]; static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ +/* Map of banks that have more than MCA_MISC0 available. */ +static DEFINE_PER_CPU(u32, smca_misc_banks_map); + static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -206,6 +204,28 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; +static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) +{ + u32 low, high; + + /* + * For SMCA enabled processors, BLKPTR field of the first MISC register + * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). + */ + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return; + + if (!(low & MCI_CONFIG_MCAX)) + return; + + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high)) + return; + + if (low & MASK_BLKPTR_LO) + per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + +} + static void smca_configure(unsigned int bank, unsigned int cpu) { unsigned int i, hwid_mcatype; @@ -243,6 +263,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu) wrmsr(smca_config, low, high); } + smca_set_misc_banks_map(bank, cpu); + /* Return early if this bank was already initialized. */ if (smca_banks[bank].hwid) return; @@ -453,50 +475,29 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int bank, unsigned int block) +static u32 smca_get_block_address(unsigned int bank, unsigned int block, + unsigned int cpu) { - u32 low, high; - u32 addr = 0; - - if (smca_get_bank_type(bank) == SMCA_RESERVED) - return addr; - if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - /* Check our cache first: */ - if (smca_bank_addrs[bank][block] != -1) - return smca_bank_addrs[bank][block]; - - /* - * For SMCA enabled processors, BLKPTR field of the first MISC register - * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). - */ - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - goto out; - - if (!(low & MCI_CONFIG_MCAX)) - goto out; - - if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && - (low & MASK_BLKPTR_LO)) - addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + return 0; -out: - smca_bank_addrs[bank][block] = addr; - return addr; + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); } static u32 get_block_address(u32 current_addr, u32 low, u32 high, - unsigned int bank, unsigned int block) + unsigned int bank, unsigned int block, + unsigned int cpu) { u32 addr = 0, offset = 0; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return addr; if (mce_flags.smca) - return smca_get_block_address(bank, block); + return smca_get_block_address(bank, block, cpu); /* Fall back to method we used for older processors: */ switch (block) { @@ -624,18 +625,19 @@ void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { - u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + u32 low = 0, high = 0, address = 0; int offset = -1; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); disable_err_thresholding(c, bank); for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(address, low, high, bank, block); + address = get_block_address(address, low, high, bank, block, cpu); if (!address) break; @@ -973,7 +975,7 @@ static void amd_deferred_error_interrupt(void) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) log_error_deferred(bank); } @@ -1014,7 +1016,7 @@ static void amd_threshold_interrupt(void) struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL; unsigned int bank, cpu = smp_processor_id(); - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1201,7 +1203,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, u32 low, high; int err; - if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + if ((bank >= per_cpu(mce_num_banks, cpu)) || (block >= NR_BLOCKS)) return 0; if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) @@ -1252,7 +1254,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(address, low, high, bank, ++block); + address = get_block_address(address, low, high, bank, ++block, cpu); if (!address) return 0; @@ -1435,7 +1437,7 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; threshold_remove_bank(cpu, bank); @@ -1456,14 +1458,14 @@ int mce_threshold_create_device(unsigned int cpu) if (bp) return 0; - bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *), + bp = kcalloc(per_cpu(mce_num_banks, cpu), sizeof(struct threshold_bank *), GFP_KERNEL); if (!bp) return -ENOMEM; per_cpu(threshold_banks, cpu) = bp; - for (bank = 0; bank < mca_cfg.banks; ++bank) { + for (bank = 0; bank < per_cpu(mce_num_banks, cpu); ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; err = threshold_create_bank(cpu, bank); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 282916f3b8d8..743370ee4983 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -65,7 +65,23 @@ static DEFINE_MUTEX(mce_sysfs_mutex); DEFINE_PER_CPU(unsigned, mce_exception_count); -struct mce_bank *mce_banks __read_mostly; +DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); + +struct mce_bank { + u64 ctl; /* subevents to enable */ + bool init; /* initialise bank? */ +}; +static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); + +#define ATTR_LEN 16 +/* One object for each MCE bank, shared by all CPUs */ +struct mce_bank_dev { + struct device_attribute attr; /* device attribute */ + char attrname[ATTR_LEN]; /* attribute name */ + u8 bank; /* bank number */ +}; +static struct mce_bank_dev mce_bank_devs[MAX_NR_BANKS]; + struct mce_vendor_flags mce_flags __read_mostly; struct mca_config mca_cfg __read_mostly = { @@ -675,6 +691,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); */ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); bool error_seen = false; struct mce m; int i; @@ -686,7 +703,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (flags & MCP_TIMESTAMP) m.tsc = rdtsc(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -788,7 +805,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, char *tmp; int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { m->status = mce_rdmsrl(msr_ops.status(i)); if (!(m->status & MCI_STATUS_VAL)) continue; @@ -1068,7 +1085,7 @@ static void mce_clear_state(unsigned long *toclear) { int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { if (test_bit(i, toclear)) mce_wrmsrl(msr_ops.status(i), 0); } @@ -1122,10 +1139,11 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, unsigned long *toclear, unsigned long *valid_banks, int no_way_out, int *worst) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; int severity, i; - for (i = 0; i < cfg->banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { __clear_bit(i, toclear); if (!test_bit(i, valid_banks)) continue; @@ -1330,7 +1348,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) local_irq_enable(); if (kill_it || do_memory_failure(&m)) - force_sig(SIGBUS, current); + force_sig(SIGBUS); local_irq_disable(); ist_end_non_atomic(); } else { @@ -1463,27 +1481,29 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int __mcheck_cpu_mce_banks_init(void) +static void __mcheck_cpu_mce_banks_init(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u8 n_banks = this_cpu_read(mce_num_banks); int i; - mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); - if (!mce_banks) - return -ENOMEM; - - for (i = 0; i < MAX_NR_BANKS; i++) { + for (i = 0; i < n_banks; i++) { struct mce_bank *b = &mce_banks[i]; + /* + * Init them all, __mcheck_cpu_apply_quirks() is going to apply + * the required vendor quirks before + * __mcheck_cpu_init_clear_banks() does the final bank setup. + */ b->ctl = -1ULL; b->init = 1; } - return 0; } /* * Initialize Machine Checks for a CPU. */ -static int __mcheck_cpu_cap_init(void) +static void __mcheck_cpu_cap_init(void) { u64 cap; u8 b; @@ -1491,16 +1511,16 @@ static int __mcheck_cpu_cap_init(void) rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (WARN_ON_ONCE(b > MAX_NR_BANKS)) + + if (b > MAX_NR_BANKS) { + pr_warn("CPU%d: Using only %u machine check banks out of %u\n", + smp_processor_id(), MAX_NR_BANKS, b); b = MAX_NR_BANKS; + } - mca_cfg.banks = max(mca_cfg.banks, b); + this_cpu_write(mce_num_banks, b); - if (!mce_banks) { - int err = __mcheck_cpu_mce_banks_init(); - if (err) - return err; - } + __mcheck_cpu_mce_banks_init(); /* Use accurate RIP reporting if available. */ if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) @@ -1508,8 +1528,6 @@ static int __mcheck_cpu_cap_init(void) if (cap & MCG_SER_P) mca_cfg.ser = 1; - - return 0; } static void __mcheck_cpu_init_generic(void) @@ -1536,9 +1554,10 @@ static void __mcheck_cpu_init_generic(void) static void __mcheck_cpu_init_clear_banks(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (!b->init) @@ -1549,6 +1568,33 @@ static void __mcheck_cpu_init_clear_banks(void) } /* + * Do a final check to see if there are any unused/RAZ banks. + * + * This must be done after the banks have been initialized and any quirks have + * been applied. + * + * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. + * Otherwise, a user who disables a bank will not be able to re-enable it + * without a system reboot. + */ +static void __mcheck_cpu_check_banks(void) +{ + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + u64 msrval; + int i; + + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { + struct mce_bank *b = &mce_banks[i]; + + if (!b->init) + continue; + + rdmsrl(msr_ops.ctl(i), msrval); + b->init = !!msrval; + } +} + +/* * During IFU recovery Sandy Bridge -EP4S processors set the RIPV and * EIPV bits in MCG_STATUS to zero on the affected logical processor (SDM * Vol 3B Table 15-20). But this confuses both the code that determines @@ -1579,6 +1625,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs) /* Add per CPU specific workarounds here */ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); struct mca_config *cfg = &mca_cfg; if (c->x86_vendor == X86_VENDOR_UNKNOWN) { @@ -1588,7 +1635,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { - if (c->x86 == 15 && cfg->banks > 4) { + if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) { /* * disable GART TBL walk error reporting, which * trips off incorrectly with the IOMMU & 3ware @@ -1607,7 +1654,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * Various K7s with broken bank 0 around. Always disable * by default. */ - if (c->x86 == 6 && cfg->banks > 0) + if (c->x86 == 6 && this_cpu_read(mce_num_banks) > 0) mce_banks[0].ctl = 0; /* @@ -1629,7 +1676,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) * valid event later, merely don't write CTL0. */ - if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0) + if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) mce_banks[0].init = 0; /* @@ -1815,7 +1862,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { + __mcheck_cpu_cap_init(); + + if (__mcheck_cpu_apply_quirks(c) < 0) { mca_cfg.disabled = 1; return; } @@ -1832,6 +1881,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_check_banks(); __mcheck_cpu_setup_timer(); } @@ -1863,7 +1913,7 @@ static void __mce_disable_bank(void *arg) void mce_disable_bank(int bank) { - if (bank >= mca_cfg.banks) { + if (bank >= this_cpu_read(mce_num_banks)) { pr_warn(FW_BUG "Ignoring request to disable invalid MCA bank %d.\n", bank); @@ -1949,9 +1999,10 @@ int __init mcheck_init(void) */ static void mce_disable_error_reporting(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2051,26 +2102,47 @@ static struct bus_type mce_subsys = { DEFINE_PER_CPU(struct device *, mce_device); -static inline struct mce_bank *attr_to_bank(struct device_attribute *attr) +static inline struct mce_bank_dev *attr_to_bank(struct device_attribute *attr) { - return container_of(attr, struct mce_bank, attr); + return container_of(attr, struct mce_bank_dev, attr); } static ssize_t show_bank(struct device *s, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; + + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + return sprintf(buf, "%llx\n", b->ctl); } static ssize_t set_bank(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { + u8 bank = attr_to_bank(attr)->bank; + struct mce_bank *b; u64 new; if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; - attr_to_bank(attr)->ctl = new; + if (bank >= per_cpu(mce_num_banks, s->id)) + return -EINVAL; + + b = &per_cpu(mce_banks_array, s->id)[bank]; + + if (!b->init) + return -ENODEV; + + b->ctl = new; mce_restart(); return size; @@ -2185,7 +2257,7 @@ static void mce_device_release(struct device *dev) kfree(dev); } -/* Per cpu device init. All of the cpus still share the same ctrl bank: */ +/* Per CPU device init. All of the CPUs still share the same bank device: */ static int mce_device_create(unsigned int cpu) { struct device *dev; @@ -2217,8 +2289,8 @@ static int mce_device_create(unsigned int cpu) if (err) goto error; } - for (j = 0; j < mca_cfg.banks; j++) { - err = device_create_file(dev, &mce_banks[j].attr); + for (j = 0; j < per_cpu(mce_num_banks, cpu); j++) { + err = device_create_file(dev, &mce_bank_devs[j].attr); if (err) goto error2; } @@ -2228,7 +2300,7 @@ static int mce_device_create(unsigned int cpu) return 0; error2: while (--j >= 0) - device_remove_file(dev, &mce_banks[j].attr); + device_remove_file(dev, &mce_bank_devs[j].attr); error: while (--i >= 0) device_remove_file(dev, mce_device_attrs[i]); @@ -2249,8 +2321,8 @@ static void mce_device_remove(unsigned int cpu) for (i = 0; mce_device_attrs[i]; i++) device_remove_file(dev, mce_device_attrs[i]); - for (i = 0; i < mca_cfg.banks; i++) - device_remove_file(dev, &mce_banks[i].attr); + for (i = 0; i < per_cpu(mce_num_banks, cpu); i++) + device_remove_file(dev, &mce_bank_devs[i].attr); device_unregister(dev); cpumask_clear_cpu(cpu, mce_device_initialized); @@ -2271,6 +2343,7 @@ static void mce_disable_cpu(void) static void mce_reenable_cpu(void) { + struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); int i; if (!mce_available(raw_cpu_ptr(&cpu_info))) @@ -2278,7 +2351,7 @@ static void mce_reenable_cpu(void) if (!cpuhp_tasks_frozen) cmci_reenable(); - for (i = 0; i < mca_cfg.banks; i++) { + for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; if (b->init) @@ -2328,10 +2401,12 @@ static __init void mce_init_banks(void) { int i; - for (i = 0; i < mca_cfg.banks; i++) { - struct mce_bank *b = &mce_banks[i]; + for (i = 0; i < MAX_NR_BANKS; i++) { + struct mce_bank_dev *b = &mce_bank_devs[i]; struct device_attribute *a = &b->attr; + b->bank = i; + sysfs_attr_init(&a->attr); a->attr.name = b->attrname; snprintf(b->attrname, ATTR_LEN, "bank%d", i); @@ -2441,22 +2516,16 @@ static int fake_panic_set(void *data, u64 val) DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mcheck_debugfs_init(void) +static void __init mcheck_debugfs_init(void) { - struct dentry *dmce, *ffake_panic; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - return -ENOMEM; - ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce, - NULL, &fake_panic_fops); - if (!ffake_panic) - return -ENOMEM; - - return 0; + debugfs_create_file_unsafe("fake_panic", 0444, dmce, NULL, + &fake_panic_fops); } #else -static int __init mcheck_debugfs_init(void) { return -EINVAL; } +static void __init mcheck_debugfs_init(void) { } #endif DEFINE_STATIC_KEY_FALSE(mcsafe_key); @@ -2464,8 +2533,6 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { - pr_info("Using %d MCE banks\n", mca_cfg.banks); - if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 5d108f70f315..1f30117b24ba 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -645,7 +645,6 @@ static const struct file_operations readme_fops = { static struct dfs_node { char *name; - struct dentry *d; const struct file_operations *fops; umode_t perm; } dfs_fls[] = { @@ -659,49 +658,23 @@ static struct dfs_node { { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH }, }; -static int __init debugfs_init(void) +static void __init debugfs_init(void) { unsigned int i; dfs_inj = debugfs_create_dir("mce-inject", NULL); - if (!dfs_inj) - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { - dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, - dfs_fls[i].perm, - dfs_inj, - &i_mce, - dfs_fls[i].fops); - - if (!dfs_fls[i].d) - goto err_dfs_add; - } - - return 0; - -err_dfs_add: - while (i-- > 0) - debugfs_remove(dfs_fls[i].d); - debugfs_remove(dfs_inj); - dfs_inj = NULL; - - return -ENODEV; + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) + debugfs_create_file(dfs_fls[i].name, dfs_fls[i].perm, dfs_inj, + &i_mce, dfs_fls[i].fops); } static int __init inject_init(void) { - int err; - if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - err = debugfs_init(); - if (err) { - free_cpumask_var(mce_inject_cpumask); - return err; - } + debugfs_init(); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); mce_register_injector_chain(&inject_nb); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index a34b55baa7aa..43031db429d2 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -22,17 +22,8 @@ enum severity_level { extern struct blocking_notifier_head x86_mce_decoder_chain; -#define ATTR_LEN 16 #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ -/* One object for each MCE bank, shared by all CPUs */ -struct mce_bank { - u64 ctl; /* subevents to enable */ - unsigned char init; /* initialise bank? */ - struct device_attribute attr; /* device attribute */ - char attrname[ATTR_LEN]; /* attribute name */ -}; - struct mce_evt_llist { struct llist_node llnode; struct mce mce; @@ -47,7 +38,6 @@ struct llist_node *mce_gen_pool_prepare_records(void); extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); -extern struct mce_bank *mce_banks; extern mce_banks_t mce_banks_ce_disabled; #ifdef CONFIG_X86_MCE_INTEL @@ -128,7 +118,6 @@ struct mca_config { bios_cmci_threshold : 1, __reserved : 59; - u8 banks; s8 bootlog; int tolerant; int monarch_timeout; @@ -137,6 +126,7 @@ struct mca_config { }; extern struct mca_config mca_cfg; +DECLARE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); struct mce_vendor_flags { /* diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c index 2d33a26d257e..210f1f5db5f7 100644 --- a/arch/x86/kernel/cpu/mce/severity.c +++ b/arch/x86/kernel/cpu/mce/severity.c @@ -400,21 +400,13 @@ static const struct file_operations severities_coverage_fops = { static int __init severities_debugfs_init(void) { - struct dentry *dmce, *fsev; + struct dentry *dmce; dmce = mce_get_debugfs_dir(); - if (!dmce) - goto err_out; - - fsev = debugfs_create_file("severities-coverage", 0444, dmce, NULL, - &severities_coverage_fops); - if (!fsev) - goto err_out; + debugfs_create_file("severities-coverage", 0444, dmce, NULL, + &severities_coverage_fops); return 0; - -err_out: - return -ENOMEM; } late_initcall(severities_debugfs_init); #endif /* CONFIG_DEBUG_FS */ diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4ddadf672ab5..a0e52bd00ecc 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -59,7 +59,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/microcode.txt + * format. See Documentation/x86/microcode.rst */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index a813987b5552..cb0fdcaf1415 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -789,13 +789,16 @@ static struct syscore_ops mc_syscore_ops = { .resume = mc_bp_resume, }; -static int mc_cpu_online(unsigned int cpu) +static int mc_cpu_starting(unsigned int cpu) { - struct device *dev; - - dev = get_cpu_device(cpu); microcode_update_cpu(cpu); pr_debug("CPU%d added\n", cpu); + return 0; +} + +static int mc_cpu_online(unsigned int cpu) +{ + struct device *dev = get_cpu_device(cpu); if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); @@ -872,7 +875,9 @@ int __init microcode_init(void) goto out_ucode_group; register_syscore_ops(&mc_syscore_ops); - cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:online", + cpuhp_setup_state_nocalls(CPUHP_AP_MICROCODE_LOADER, "x86/microcode:starting", + mc_cpu_starting, NULL); + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh index d0dfb892c72f..aed45b8895d5 100644 --- a/arch/x86/kernel/cpu/mkcapflags.sh +++ b/arch/x86/kernel/cpu/mkcapflags.sh @@ -4,6 +4,8 @@ # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h # +set -e + IN=$1 OUT=$2 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 7df29f08871b..062f77279ce3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -80,6 +81,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) inc_irq_stat(hyperv_stimer0_count); if (hv_stimer0_handler) hv_stimer0_handler(); + add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0); ack_APIC_irq(); exiting_irq(); @@ -89,7 +91,7 @@ __visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) { *vector = HYPERV_STIMER0_VECTOR; - *irq = 0; /* Unused on x86/x64 */ + *irq = -1; /* Unused on x86/x64 */ hv_stimer0_handler = handler; return 0; } @@ -266,9 +268,9 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); } register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9356c1c9024d..aa5c064a6a22 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -743,7 +743,15 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - wbinvd(); + + /* + * Cache flushing is the most time-consuming step when programming + * the MTRRs. Fortunately, as per the Intel Software Development + * Manual, we can skip it if the processor supports cache self- + * snooping. + */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (boot_cpu_has(X86_FEATURE_PGE)) { @@ -760,7 +768,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) /* Disable MTRRs, and set the default type to uncached */ mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); - wbinvd(); + + /* Again, only flush caches if we have to. */ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); } static void post_set(void) __releases(set_atomicity_lock) diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index 604c0e3bcc83..d7623e1b927d 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -431,11 +431,7 @@ static int pseudo_lock_fn(void *_rdtgrp) #else register unsigned int line_size asm("esi"); register unsigned int size asm("edi"); -#ifdef CONFIG_X86_64 - register void *mem_r asm("rbx"); -#else - register void *mem_r asm("ebx"); -#endif /* CONFIG_X86_64 */ + register void *mem_r asm(_ASM_BX); #endif /* CONFIG_KASAN */ /* @@ -1503,7 +1499,7 @@ static int pseudo_lock_dev_mmap(struct file *filp, struct vm_area_struct *vma) * may be scheduled elsewhere and invalidate entries in the * pseudo-locked region. */ - if (!cpumask_subset(¤t->cpus_allowed, &plr->d->cpu_mask)) { + if (!cpumask_subset(current->cpus_ptr, &plr->d->cpu_mask)) { mutex_unlock(&rdtgroup_mutex); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 2131b8bbaad7..bf3034994754 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -796,8 +796,12 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { struct rdt_resource *r = of->kn->parent->priv; - u32 sw_shareable = 0, hw_shareable = 0; - u32 exclusive = 0, pseudo_locked = 0; + /* + * Use unsigned long even though only 32 bits are used to ensure + * test_bit() is used safely. + */ + unsigned long sw_shareable = 0, hw_shareable = 0; + unsigned long exclusive = 0, pseudo_locked = 0; struct rdt_domain *dom; int i, hwb, swb, excl, psl; enum rdtgrp_mode mode; @@ -842,10 +846,10 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of, } for (i = r->cache.cbm_len - 1; i >= 0; i--) { pseudo_locked = dom->plr ? dom->plr->cbm : 0; - hwb = test_bit(i, (unsigned long *)&hw_shareable); - swb = test_bit(i, (unsigned long *)&sw_shareable); - excl = test_bit(i, (unsigned long *)&exclusive); - psl = test_bit(i, (unsigned long *)&pseudo_locked); + hwb = test_bit(i, &hw_shareable); + swb = test_bit(i, &sw_shareable); + excl = test_bit(i, &exclusive); + psl = test_bit(i, &pseudo_locked); if (hwb && swb) seq_putc(seq, 'X'); else if (hwb && !swb) @@ -2484,28 +2488,21 @@ out_destroy: * modification to the CBM if the default does not satisfy the * requirements. */ -static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r) +static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r) { - /* - * Convert the u32 _val to an unsigned long required by all the bit - * operations within this function. No more than 32 bits of this - * converted value can be accessed because all bit operations are - * additionally provided with cbm_len that is initialized during - * hardware enumeration using five bits from the EAX register and - * thus never can exceed 32 bits. - */ - unsigned long *val = (unsigned long *)_val; unsigned int cbm_len = r->cache.cbm_len; unsigned long first_bit, zero_bit; + unsigned long val = _val; - if (*val == 0) - return; + if (!val) + return 0; - first_bit = find_first_bit(val, cbm_len); - zero_bit = find_next_zero_bit(val, cbm_len, first_bit); + first_bit = find_first_bit(&val, cbm_len); + zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); /* Clear any remaining bits to ensure contiguous region */ - bitmap_clear(val, zero_bit, cbm_len - zero_bit); + bitmap_clear(&val, zero_bit, cbm_len - zero_bit); + return (u32)val; } /* @@ -2563,7 +2560,7 @@ static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r, * Force the initial CBM to be valid, user can * modify the CBM based on system availability. */ - cbm_ensure_valid(&d->new_ctrl, r); + d->new_ctrl = cbm_ensure_valid(d->new_ctrl, r); /* * Assign the u32 CBM to an unsigned long to ensure that * bitmap_weight() does not access out-of-bound memory. diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 94aa1c72ca98..adf9b71386ef 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,10 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_LOCAL, CPUID_EDX, 2, 0x0000000f, 1 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, { X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 }, { X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 }, diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8f6c784141d1..ee48c3fc8a65 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -15,33 +15,66 @@ /* leaf 0xb SMT level */ #define SMT_LEVEL 0 -/* leaf 0xb sub-leaf types */ +/* extended topology sub-leaf types */ #define INVALID_TYPE 0 #define SMT_TYPE 1 #define CORE_TYPE 2 +#define DIE_TYPE 5 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ #ifdef CONFIG_SMP +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + +/* + * Check if given CPUID extended toplogy "leaf" is implemented + */ +static int check_extended_topology_leaf(int leaf) +{ unsigned int eax, ebx, ecx, edx; - if (c->cpuid_level < 0xb) + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + + if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) return -1; - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + return 0; +} +/* + * Return best CPUID Extended Toplogy Leaf supported + */ +static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) +{ + if (c->cpuid_level >= 0x1f) { + if (check_extended_topology_leaf(0x1f) == 0) + return 0x1f; + } - /* - * check if the cpuid leaf 0xb is actually implemented. - */ - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) + if (c->cpuid_level >= 0xb) { + if (check_extended_topology_leaf(0xb) == 0) + return 0xb; + } + + return -1; +} +#endif + +int detect_extended_topology_early(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int eax, ebx, ecx, edx; + int leaf; + + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); /* * initial apic id, which also represents 32-bit extended x2apic id. */ @@ -52,7 +85,7 @@ int detect_extended_topology_early(struct cpuinfo_x86 *c) } /* - * Check for extended topology enumeration cpuid leaf 0xb and if it + * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology * detection. */ @@ -60,22 +93,28 @@ int detect_extended_topology(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width; + unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; + unsigned int die_select_mask, die_level_siblings; + int leaf; - if (detect_extended_topology_early(c) < 0) + leaf = detect_extended_topology_leaf(c); + if (leaf < 0) return -1; /* * Populate HT related information from sub-leaf level 0. */ - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); + c->initial_apicid = edx; core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); + cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); /* * Check for the Core type in the implemented sub leaves. @@ -83,23 +122,34 @@ int detect_extended_topology(struct cpuinfo_x86 *c) if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - break; + die_level_siblings = core_level_siblings; + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); + } + if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { + die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); + die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } sub_index++; } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; - - c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) - & core_select_mask; - c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width); + die_select_mask = (~(-1 << die_plus_mask_width)) >> + core_plus_mask_width; + + c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, + ht_mask_width) & core_select_mask; + c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, + core_plus_mask_width) & die_select_mask; + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, + die_plus_mask_width); /* * Reinit the apicid, now that we have extended initial_apicid. */ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); c->x86_max_cores = (core_level_siblings / smp_num_siblings); + __max_die_per_package = (die_level_siblings / core_level_siblings); #endif return 0; } diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c new file mode 100644 index 000000000000..6a204e7336c1 --- /dev/null +++ b/arch/x86/kernel/cpu/umwait.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/syscore_ops.h> +#include <linux/suspend.h> +#include <linux/cpu.h> + +#include <asm/msr.h> + +#define UMWAIT_C02_ENABLE 0 + +#define UMWAIT_CTRL_VAL(max_time, c02_disable) \ + (((max_time) & MSR_IA32_UMWAIT_CONTROL_TIME_MASK) | \ + ((c02_disable) & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE)) + +/* + * Cache IA32_UMWAIT_CONTROL MSR. This is a systemwide control. By default, + * umwait max time is 100000 in TSC-quanta and C0.2 is enabled + */ +static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); + +/* + * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(umwait_lock); + +static void umwait_update_control_msr(void * unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_IA32_UMWAIT_CONTROL, READ_ONCE(umwait_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of umwait_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int umwait_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + umwait_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void umwait_syscore_resume(void) +{ + umwait_update_control_msr(NULL); +} + +static struct syscore_ops umwait_syscore_ops = { + .resume = umwait_syscore_resume, +}; + +/* sysfs interface */ + +/* + * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled. + * Otherwise, C0.2 is enabled. + */ +static inline bool umwait_ctrl_c02_enabled(u32 ctrl) +{ + return !(ctrl & MSR_IA32_UMWAIT_CONTROL_C02_DISABLE); +} + +static inline u32 umwait_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; +} + +static inline void umwait_update_control(u32 maxtime, bool c02_enable) +{ + u32 ctrl = maxtime & MSR_IA32_UMWAIT_CONTROL_TIME_MASK; + + if (!c02_enable) + ctrl |= MSR_IA32_UMWAIT_CONTROL_C02_DISABLE; + + WRITE_ONCE(umwait_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(umwait_update_control_msr, NULL, 1); +} + +static ssize_t +enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%d\n", umwait_ctrl_c02_enabled(ctrl)); +} + +static ssize_t enable_c02_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool c02_enable; + u32 ctrl; + int ret; + + ret = kstrtobool(buf, &c02_enable); + if (ret) + return ret; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (c02_enable != umwait_ctrl_c02_enabled(ctrl)) + umwait_update_control(ctrl, c02_enable); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(enable_c02); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(umwait_control_cached); + + return sprintf(buf, "%u\n", umwait_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_IA32_UMWAIT_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&umwait_lock); + + ctrl = READ_ONCE(umwait_control_cached); + if (max_time != umwait_ctrl_max_time(ctrl)) + umwait_update_control(max_time, umwait_ctrl_c02_enabled(ctrl)); + + mutex_unlock(&umwait_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *umwait_attrs[] = { + &dev_attr_enable_c02.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group umwait_attr_group = { + .attrs = umwait_attrs, + .name = "umwait_control", +}; + +static int __init umwait_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_WAITPKG)) + return -ENODEV; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", + umwait_cpu_online, NULL); + + register_syscore_ops(&umwait_syscore_ops); + + /* + * Add umwait control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = cpu_subsys.dev_root; + return sysfs_create_group(&dev->kobj, &umwait_attr_group); +} +device_initcall(umwait_init); diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 0eda91f8eeac..3c648476d4fb 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void) #ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c new file mode 100644 index 000000000000..8e6f2f4b4afe --- /dev/null +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/sched.h> +#include <linux/sched/clock.h> + +#include <asm/cpufeature.h> + +#include "cpu.h" + +#define MSR_ZHAOXIN_FCR57 0x00001257 + +#define ACE_PRESENT (1 << 6) +#define ACE_ENABLED (1 << 7) +#define ACE_FCR (1 << 7) /* MSR_ZHAOXIN_FCR */ + +#define RNG_PRESENT (1 << 2) +#define RNG_ENABLED (1 << 3) +#define RNG_ENABLE (1 << 8) /* MSR_ZHAOXIN_RNG */ + +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + +static void init_zhaoxin_cap(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + + /* Test for Extended Feature Flags presence */ + if (cpuid_eax(0xC0000000) >= 0xC0000001) { + u32 tmp = cpuid_edx(0xC0000001); + + /* Enable ACE unit, if present and disabled */ + if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable ACE unit */ + lo |= ACE_FCR; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled ACE h/w crypto\n"); + } + + /* Enable RNG unit, if present and disabled */ + if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) { + rdmsr(MSR_ZHAOXIN_FCR57, lo, hi); + /* Enable RNG unit */ + lo |= RNG_ENABLE; + wrmsr(MSR_ZHAOXIN_FCR57, lo, hi); + pr_info("CPU: Enabled h/w RNG\n"); + } + + /* + * Store Extended Feature Flags as word 5 of the CPU + * capability bit array + */ + c->x86_capability[CPUID_C000_0001_EDX] = cpuid_edx(0xC0000001); + } + + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + cpu_detect_cache_sizes(c); +} + +static void early_init_zhaoxin(struct cpuinfo_x86 *c) +{ + if (c->x86 >= 0x6) + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_SYSENTER32); +#endif + if (c->x86_power & (1 << 8)) { + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); + set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); + } + + if (c->cpuid_level >= 0x00000001) { + u32 eax, ebx, ecx, edx; + + cpuid(0x00000001, &eax, &ebx, &ecx, &edx); + /* + * If HTT (EDX[28]) is set EBX[16:23] contain the number of + * apicids which are reserved per package. Store the resulting + * shift value for the package management code. + */ + if (edx & (1U << 28)) + c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); + } + +} + +static void zhaoxin_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + +static void init_zhaoxin(struct cpuinfo_x86 *c) +{ + early_init_zhaoxin(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + + if (c->x86 >= 0x6) + init_zhaoxin_cap(c); +#ifdef CONFIG_X86_64 + set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); +#endif + + if (cpu_has(c, X86_FEATURE_VMX)) + zhaoxin_detect_vmx_virtcap(c); +} + +#ifdef CONFIG_X86_32 +static unsigned int +zhaoxin_size_cache(struct cpuinfo_x86 *c, unsigned int size) +{ + return size; +} +#endif + +static const struct cpu_dev zhaoxin_cpu_dev = { + .c_vendor = "zhaoxin", + .c_ident = { " Shanghai " }, + .c_early_init = early_init_zhaoxin, + .c_init = init_zhaoxin, +#ifdef CONFIG_X86_32 + .legacy_cache_size = zhaoxin_size_cache, +#endif + .c_x86_vendor = X86_VENDOR_ZHAOXIN, +}; + +cpu_dev_register(zhaoxin_cpu_dev); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 576b2e1bfc12..2bf70a2fed90 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -56,7 +56,6 @@ struct crash_memmap_data { */ crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL; EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss); -unsigned long crash_zero_bytes; static inline void cpu_crash_vmclear_loaded_vmcss(void) { @@ -73,14 +72,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void) static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { -#ifdef CONFIG_X86_32 - struct pt_regs fixed_regs; - - if (!user_mode(regs)) { - crash_fixup_ss_esp(&fixed_regs, regs); - regs = &fixed_regs; - } -#endif crash_save_cpu(regs, cpu); /* @@ -181,6 +172,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE + +static unsigned long crash_zero_bytes; + static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -381,6 +375,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); + /* Add e820 reserved ranges */ + cmd.type = E820_TYPE_RESERVED; + flags = IORESOURCE_MEM; + walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd, + memmap_entry_callback); + /* Add crashk_low_res region */ if (crashk_low_res.end) { ei.addr = crashk_low_res.start; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 8f32e705a980..e69408bf664b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1063,10 +1063,10 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE; case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; case E820_TYPE_RESERVED_KERN: /* Fall-through: */ case E820_TYPE_RAM: /* Fall-through: */ case E820_TYPE_UNUSABLE: /* Fall-through: */ - case E820_TYPE_RESERVED: /* Fall-through: */ default: return IORES_DESC_NONE; } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 649fbc3fcf9f..12c70840980e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -43,18 +43,6 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); */ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -static void kernel_fpu_disable(void) -{ - WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); -} - -static void kernel_fpu_enable(void) -{ - WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, false); -} - static bool kernel_fpu_disabled(void) { return this_cpu_read(in_kernel_fpu); @@ -94,42 +82,33 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -static void __kernel_fpu_begin(void) +void kernel_fpu_begin(void) { - struct fpu *fpu = ¤t->thread.fpu; + preempt_disable(); WARN_ON_FPU(!irq_fpu_usable()); + WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); - kernel_fpu_disable(); + this_cpu_write(in_kernel_fpu, true); - if (!(current->flags & PF_KTHREAD)) { - if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { - set_thread_flag(TIF_NEED_FPU_LOAD); - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } + if (!(current->flags & PF_KTHREAD) && + !test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(¤t->thread.fpu); } __cpu_invalidate_fpregs_state(); } - -static void __kernel_fpu_end(void) -{ - kernel_fpu_enable(); -} - -void kernel_fpu_begin(void) -{ - preempt_disable(); - __kernel_fpu_begin(); -} EXPORT_SYMBOL_GPL(kernel_fpu_begin); void kernel_fpu_end(void) { - __kernel_fpu_end(); + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + + this_cpu_write(in_kernel_fpu, false); preempt_enable(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); @@ -155,7 +134,6 @@ void fpu__save(struct fpu *fpu) trace_x86_fpu_after_save(fpu); fpregs_unlock(); } -EXPORT_SYMBOL_GPL(fpu__save); /* * Legacy x87 fpstate state init: diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index ef0030e3fe6b..6ce7e0a23268 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -204,12 +204,6 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ if (!boot_cpu_has(X86_FEATURE_FPU)) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) @@ -252,17 +246,20 @@ static void __init fpu__init_parse_early_param(void) char *argptr = arg; int bit; +#ifdef CONFIG_X86_32 if (cmdline_find_option_bool(boot_command_line, "no387")) +#ifdef CONFIG_MATH_EMULATION setup_clear_cpu_cap(X86_FEATURE_FPU); +#else + pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n"); +#endif - if (cmdline_find_option_bool(boot_command_line, "nofxsr")) { + if (cmdline_find_option_bool(boot_command_line, "nofxsr")) setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - } +#endif if (cmdline_find_option_bool(boot_command_line, "noxsave")) - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); if (cmdline_find_option_bool(boot_command_line, "noxsaveopt")) setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3c36dd1784db..e5cb67d67c03 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -8,6 +8,8 @@ #include <linux/cpu.h> #include <linux/mman.h> #include <linux/pkeys.h> +#include <linux/seq_file.h> +#include <linux/proc_fs.h> #include <asm/fpu/api.h> #include <asm/fpu/internal.h> @@ -68,15 +70,6 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; unsigned int fpu_user_xstate_size; /* - * Clear all of the X86_FEATURE_* bits that are unavailable - * when the CPU has no XSAVE support. - */ -void fpu__xstate_clear_all_cpu_caps(void) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); -} - -/* * Return whether the system supports a given xfeature. * * Also return the name of the (most advanced) feature that the caller requested: @@ -709,7 +702,7 @@ static void fpu__init_disable_system_xstate(void) { xfeatures_mask = 0; cr4_clear_bits(X86_CR4_OSXSAVE); - fpu__xstate_clear_all_cpu_caps(); + setup_clear_cpu_cap(X86_FEATURE_XSAVE); } /* @@ -1240,3 +1233,48 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) return 0; } + +#ifdef CONFIG_PROC_PID_ARCH_STATUS +/* + * Report the amount of time elapsed in millisecond since last AVX512 + * use in the task. + */ +static void avx512_status(struct seq_file *m, struct task_struct *task) +{ + unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp); + long delta; + + if (!timestamp) { + /* + * Report -1 if no AVX512 usage + */ + delta = -1; + } else { + delta = (long)(jiffies - timestamp); + /* + * Cap to LONG_MAX if time difference > LONG_MAX + */ + if (delta < 0) + delta = LONG_MAX; + delta = jiffies_to_msecs(delta); + } + + seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta); + seq_putc(m, '\n'); +} + +/* + * Report architecture specific information + */ +int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + /* + * Report AVX512 state if the processor and build option supported. + */ + if (cpu_feature_enabled(X86_FEATURE_AVX512F)) + avx512_status(m, task); + + return 0; +} +#endif /* CONFIG_PROC_PID_ARCH_STATUS */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0927bb158ffc..4b73f5937f41 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/module.h> +#include <linux/memory.h> #include <trace/syscall.h> @@ -34,16 +35,25 @@ #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) + __acquires(&text_mutex) { + /* + * Need to grab text_mutex to prevent a race from module loading + * and live kernel patching from changing the text permissions while + * ftrace has it set to "read/write". + */ + mutex_lock(&text_mutex); set_kernel_text_rw(); set_all_modules_text_rw(); return 0; } int ftrace_arch_code_modify_post_process(void) + __releases(&text_mutex) { set_all_modules_text_ro(); set_kernel_text_ro(); + mutex_unlock(&text_mutex); return 0; } @@ -300,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs) ip = regs->ip - INT3_INSN_SIZE; -#ifdef CONFIG_X86_64 if (ftrace_location(ip)) { int3_emulate_call(regs, (unsigned long)ftrace_regs_caller); return 1; @@ -312,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs) int3_emulate_call(regs, ftrace_update_func_call); return 1; } -#else - if (ftrace_location(ip) || is_ftrace_caller(ip)) { - int3_emulate_jmp(regs, ip + CALL_INSN_SIZE); - return 1; - } -#endif return 0; } diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index 2ba914a34b06..073aab525d80 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -9,6 +9,8 @@ #include <asm/export.h> #include <asm/ftrace.h> #include <asm/nospec-branch.h> +#include <asm/frame.h> +#include <asm/asm-offsets.h> # define function_hook __fentry__ EXPORT_SYMBOL(__fentry__) @@ -89,26 +91,38 @@ END(ftrace_caller) ENTRY(ftrace_regs_caller) /* - * i386 does not save SS and ESP when coming from kernel. - * Instead, to get sp, ®s->sp is used (see ptrace.h). - * Unfortunately, that means eflags must be at the same location - * as the current return ip is. We move the return ip into the - * regs->ip location, and move flags into the return ip location. + * We're here from an mcount/fentry CALL, and the stack frame looks like: + * + * <previous context> + * RET-IP + * + * The purpose of this function is to call out in an emulated INT3 + * environment with a stack frame like: + * + * <previous context> + * gap / RET-IP + * gap + * gap + * gap + * pt_regs + * + * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds */ - pushl $__KERNEL_CS - pushl 4(%esp) /* Save the return ip */ - pushl $0 /* Load 0 into orig_ax */ + subl $3*4, %esp # RET-IP + 3 gaps + pushl %ss # ss + pushl %esp # points at ss + addl $5*4, (%esp) # make it point at <previous context> + pushfl # flags + pushl $__KERNEL_CS # cs + pushl 7*4(%esp) # ip <- RET-IP + pushl $0 # orig_eax + pushl %gs pushl %fs pushl %es pushl %ds - pushl %eax - - /* Get flags and place them into the return ip slot */ - pushf - popl %eax - movl %eax, 8*4(%esp) + pushl %eax pushl %ebp pushl %edi pushl %esi @@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller) pushl %ecx pushl %ebx - movl 12*4(%esp), %eax /* Load ip (1st parameter) */ - subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ - movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */ - movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ - pushl %esp /* Save pt_regs as 4th parameter */ + ENCODE_FRAME_POINTER + + movl PT_EIP(%esp), %eax # 1st argument: IP + subl $MCOUNT_INSN_SIZE, %eax + movl 21*4(%esp), %edx # 2nd argument: parent ip + movl function_trace_op, %ecx # 3rd argument: ftrace_pos + pushl %esp # 4th argument: pt_regs GLOBAL(ftrace_regs_call) call ftrace_stub - addl $4, %esp /* Skip pt_regs */ + addl $4, %esp # skip 4th argument - /* restore flags */ - push 14*4(%esp) - popf + /* place IP below the new SP */ + movl PT_OLDESP(%esp), %eax + movl PT_EIP(%esp), %ecx + movl %ecx, -4(%eax) - /* Move return ip back to its original location */ - movl 12*4(%esp), %eax - movl %eax, 14*4(%esp) + /* place EAX below that */ + movl PT_EAX(%esp), %ecx + movl %ecx, -8(%eax) popl %ebx popl %ecx @@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call) popl %esi popl %edi popl %ebp - popl %eax - popl %ds - popl %es - popl %fs - popl %gs - /* use lea to not affect flags */ - lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */ + lea -8(%eax), %esp + popl %eax jmp .Lftrace_ret diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 10eb2760ef2c..809d54397dba 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -9,6 +9,7 @@ #include <asm/export.h> #include <asm/nospec-branch.h> #include <asm/unwind_hints.h> +#include <asm/frame.h> .code64 .section .entry.text, "ax" @@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr) leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx movq %rcx, RSP(%rsp) + ENCODE_FRAME_POINTER + /* regs go into 4th parameter */ leaq (%rsp), %rcx diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 16b1cbd3a61e..29ffa495bd1c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -184,24 +184,25 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); if (la57) { - p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); + p4d = fixup_pointer(early_dynamic_pgts[(*next_pgt_ptr)++], + physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; - i = (physaddr >> P4D_SHIFT) % PTRS_PER_P4D; - p4d[i + 0] = (pgdval_t)pud + pgtable_flags; - p4d[i + 1] = (pgdval_t)pud + pgtable_flags; + i = physaddr >> P4D_SHIFT; + p4d[(i + 0) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; + p4d[(i + 1) % PTRS_PER_P4D] = (pgdval_t)pud + pgtable_flags; } else { i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; pgd[i + 0] = (pgdval_t)pud + pgtable_flags; pgd[i + 1] = (pgdval_t)pud + pgtable_flags; } - i = (physaddr >> PUD_SHIFT) % PTRS_PER_PUD; - pud[i + 0] = (pudval_t)pmd + pgtable_flags; - pud[i + 1] = (pudval_t)pmd + pgtable_flags; + i = physaddr >> PUD_SHIFT; + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ @@ -211,8 +212,9 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry += physaddr; for (i = 0; i < DIV_ROUND_UP(_end - _text, PMD_SIZE); i++) { - int idx = i + (physaddr >> PMD_SHIFT) % PTRS_PER_PMD; - pmd[idx] = pmd_entry + i * PMD_SIZE; + int idx = i + (physaddr >> PMD_SHIFT); + + pmd[idx % PTRS_PER_PMD] = pmd_entry + i * PMD_SIZE; } /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index a0573f2e7763..c43e96a938d0 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,32 +1,44 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/clocksource.h> #include <linux/clockchips.h> #include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/export.h> #include <linux/delay.h> -#include <linux/errno.h> -#include <linux/i8253.h> -#include <linux/slab.h> #include <linux/hpet.h> -#include <linux/init.h> #include <linux/cpu.h> -#include <linux/pm.h> -#include <linux/io.h> +#include <linux/irq.h> -#include <asm/cpufeature.h> -#include <asm/irqdomain.h> -#include <asm/fixmap.h> #include <asm/hpet.h> #include <asm/time.h> -#define HPET_MASK CLOCKSOURCE_MASK(32) +#undef pr_fmt +#define pr_fmt(fmt) "hpet: " fmt -#define HPET_DEV_USED_BIT 2 -#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) -#define HPET_DEV_VALID 0x8 -#define HPET_DEV_FSB_CAP 0x1000 -#define HPET_DEV_PERI_CAP 0x2000 +enum hpet_mode { + HPET_MODE_UNUSED, + HPET_MODE_LEGACY, + HPET_MODE_CLOCKEVT, + HPET_MODE_DEVICE, +}; + +struct hpet_channel { + struct clock_event_device evt; + unsigned int num; + unsigned int cpu; + unsigned int irq; + unsigned int in_use; + enum hpet_mode mode; + unsigned int boot_cfg; + char name[10]; +}; + +struct hpet_base { + unsigned int nr_channels; + unsigned int nr_clockevents; + unsigned int boot_cfg; + struct hpet_channel *channels; +}; + +#define HPET_MASK CLOCKSOURCE_MASK(32) #define HPET_MIN_CYCLES 128 #define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) @@ -39,22 +51,25 @@ u8 hpet_blockid; /* OS timer block num */ bool hpet_msi_disable; #ifdef CONFIG_PCI_MSI -static unsigned int hpet_num_timers; +static DEFINE_PER_CPU(struct hpet_channel *, cpu_hpet_channel); +static struct irq_domain *hpet_domain; #endif + static void __iomem *hpet_virt_address; -struct hpet_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int flags; - char name[10]; -}; +static struct hpet_base hpet_base; + +static bool hpet_legacy_int_enabled; +static unsigned long hpet_freq; -static inline struct hpet_dev *EVT_TO_HPET_DEV(struct clock_event_device *evtdev) +bool boot_hpet_disable; +bool hpet_force_user; +static bool hpet_verbose; + +static inline +struct hpet_channel *clockevent_to_channel(struct clock_event_device *evt) { - return container_of(evtdev, struct hpet_dev, evt); + return container_of(evt, struct hpet_channel, evt); } inline unsigned int hpet_readl(unsigned int a) @@ -67,10 +82,6 @@ static inline void hpet_writel(unsigned int d, unsigned int a) writel(d, hpet_virt_address + a); } -#ifdef CONFIG_X86_64 -#include <asm/pgtable.h> -#endif - static inline void hpet_set_mapping(void) { hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); @@ -85,10 +96,6 @@ static inline void hpet_clear_mapping(void) /* * HPET command line enable / disable */ -bool boot_hpet_disable; -bool hpet_force_user; -static bool hpet_verbose; - static int __init hpet_setup(char *str) { while (str) { @@ -120,13 +127,8 @@ static inline int is_hpet_capable(void) return !boot_hpet_disable && hpet_address; } -/* - * HPET timer interrupt enable / disable - */ -static bool hpet_legacy_int_enabled; - /** - * is_hpet_enabled - check whether the hpet timer interrupt is enabled + * is_hpet_enabled - Check whether the legacy HPET timer interrupt is enabled */ int is_hpet_enabled(void) { @@ -136,32 +138,36 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); static void _hpet_print_config(const char *function, int line) { - u32 i, timers, l, h; - printk(KERN_INFO "hpet: %s(%d):\n", function, line); - l = hpet_readl(HPET_ID); - h = hpet_readl(HPET_PERIOD); - timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h); - l = hpet_readl(HPET_CFG); - h = hpet_readl(HPET_STATUS); - printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h); + u32 i, id, period, cfg, status, channels, l, h; + + pr_info("%s(%d):\n", function, line); + + id = hpet_readl(HPET_ID); + period = hpet_readl(HPET_PERIOD); + pr_info("ID: 0x%x, PERIOD: 0x%x\n", id, period); + + cfg = hpet_readl(HPET_CFG); + status = hpet_readl(HPET_STATUS); + pr_info("CFG: 0x%x, STATUS: 0x%x\n", cfg, status); + l = hpet_readl(HPET_COUNTER); h = hpet_readl(HPET_COUNTER+4); - printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + pr_info("COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h); + + channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - for (i = 0; i < timers; i++) { + for (i = 0; i < channels; i++) { l = hpet_readl(HPET_Tn_CFG(i)); h = hpet_readl(HPET_Tn_CFG(i)+4); - printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", - i, l, h); + pr_info("T%d: CFG_l: 0x%x, CFG_h: 0x%x\n", i, l, h); + l = hpet_readl(HPET_Tn_CMP(i)); h = hpet_readl(HPET_Tn_CMP(i)+4); - printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", - i, l, h); + pr_info("T%d: CMP_l: 0x%x, CMP_h: 0x%x\n", i, l, h); + l = hpet_readl(HPET_Tn_ROUTE(i)); h = hpet_readl(HPET_Tn_ROUTE(i)+4); - printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", - i, l, h); + pr_info("T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n", i, l, h); } } @@ -172,31 +178,20 @@ do { \ } while (0) /* - * When the hpet driver (/dev/hpet) is enabled, we need to reserve + * When the HPET driver (/dev/hpet) is enabled, we need to reserve * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd); - -static void hpet_reserve_platform_timers(unsigned int id) +static void __init hpet_reserve_platform_timers(void) { - struct hpet __iomem *hpet = hpet_virt_address; - struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; - unsigned int nrtimers, i; struct hpet_data hd; - - nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; + unsigned int i; memset(&hd, 0, sizeof(hd)); hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; - hpet_reserve_timer(&hd, 0); - -#ifdef CONFIG_HPET_EMULATE_RTC - hpet_reserve_timer(&hd, 1); -#endif + hd.hd_address = hpet_virt_address; + hd.hd_nirqs = hpet_base.nr_channels; /* * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254 @@ -206,30 +201,52 @@ static void hpet_reserve_platform_timers(unsigned int id) hd.hd_irq[0] = HPET_LEGACY_8254; hd.hd_irq[1] = HPET_LEGACY_RTC; - for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & - Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; - } + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + + if (i >= 2) + hd.hd_irq[i] = hc->irq; - hpet_reserve_msi_timers(&hd); + switch (hc->mode) { + case HPET_MODE_UNUSED: + case HPET_MODE_DEVICE: + hc->mode = HPET_MODE_DEVICE; + break; + case HPET_MODE_CLOCKEVT: + case HPET_MODE_LEGACY: + hpet_reserve_timer(&hd, hc->num); + break; + } + } hpet_alloc(&hd); +} +static void __init hpet_select_device_channel(void) +{ + int i; + + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + + /* Associate the first unused channel to /dev/hpet */ + if (hc->mode == HPET_MODE_UNUSED) { + hc->mode = HPET_MODE_DEVICE; + return; + } + } } + #else -static void hpet_reserve_platform_timers(unsigned int id) { } +static inline void hpet_reserve_platform_timers(void) { } +static inline void hpet_select_device_channel(void) {} #endif -/* - * Common hpet info - */ -static unsigned long hpet_freq; - -static struct clock_event_device hpet_clockevent; - +/* Common HPET functions */ static void hpet_stop_counter(void) { u32 cfg = hpet_readl(HPET_CFG); + cfg &= ~HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -243,6 +260,7 @@ static void hpet_reset_counter(void) static void hpet_start_counter(void) { unsigned int cfg = hpet_readl(HPET_CFG); + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); } @@ -274,24 +292,9 @@ static void hpet_enable_legacy_int(void) hpet_legacy_int_enabled = true; } -static void hpet_legacy_clockevent_register(void) -{ - /* Start HPET legacy interrupts */ - hpet_enable_legacy_int(); - - /* - * Start hpet with the boot cpu mask and make it - * global after the IO_APIC has been initialized. - */ - hpet_clockevent.cpumask = cpumask_of(boot_cpu_data.cpu_index); - clockevents_config_and_register(&hpet_clockevent, hpet_freq, - HPET_MIN_PROG_DELTA, 0x7FFFFFFF); - global_clock_event = &hpet_clockevent; - printk(KERN_DEBUG "hpet clockevent registered\n"); -} - -static int hpet_set_periodic(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_periodic(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg, cmp, now; uint64_t delta; @@ -300,11 +303,11 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer) delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned int)delta; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); - hpet_writel(cmp, HPET_Tn_CMP(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); + hpet_writel(cmp, HPET_Tn_CMP(channel)); udelay(1); /* * HPET on AMD 81xx needs a second write (with HPET_TN_SETVAL @@ -313,52 +316,55 @@ static int hpet_set_periodic(struct clock_event_device *evt, int timer) * (See AMD-8111 HyperTransport I/O Hub Data Sheet, * Publication # 24674) */ - hpet_writel((unsigned int)delta, HPET_Tn_CMP(timer)); + hpet_writel((unsigned int)delta, HPET_Tn_CMP(channel)); hpet_start_counter(); hpet_print_config(); return 0; } -static int hpet_set_oneshot(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_oneshot(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } -static int hpet_shutdown(struct clock_event_device *evt, int timer) +static int hpet_clkevt_set_state_shutdown(struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; unsigned int cfg; - cfg = hpet_readl(HPET_Tn_CFG(timer)); + cfg = hpet_readl(HPET_Tn_CFG(channel)); cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_Tn_CFG(timer)); + hpet_writel(cfg, HPET_Tn_CFG(channel)); return 0; } -static int hpet_resume(struct clock_event_device *evt) +static int hpet_clkevt_legacy_resume(struct clock_event_device *evt) { hpet_enable_legacy_int(); hpet_print_config(); return 0; } -static int hpet_next_event(unsigned long delta, - struct clock_event_device *evt, int timer) +static int +hpet_clkevt_set_next_event(unsigned long delta, struct clock_event_device *evt) { + unsigned int channel = clockevent_to_channel(evt)->num; u32 cnt; s32 res; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; - hpet_writel(cnt, HPET_Tn_CMP(timer)); + hpet_writel(cnt, HPET_Tn_CMP(channel)); /* * HPETs are a complete disaster. The compare register is @@ -387,360 +393,250 @@ static int hpet_next_event(unsigned long delta, return res < HPET_MIN_CYCLES ? -ETIME : 0; } -static int hpet_legacy_shutdown(struct clock_event_device *evt) +static void hpet_init_clockevent(struct hpet_channel *hc, unsigned int rating) { - return hpet_shutdown(evt, 0); -} + struct clock_event_device *evt = &hc->evt; -static int hpet_legacy_set_oneshot(struct clock_event_device *evt) -{ - return hpet_set_oneshot(evt, 0); -} + evt->rating = rating; + evt->irq = hc->irq; + evt->name = hc->name; + evt->cpumask = cpumask_of(hc->cpu); + evt->set_state_oneshot = hpet_clkevt_set_state_oneshot; + evt->set_next_event = hpet_clkevt_set_next_event; + evt->set_state_shutdown = hpet_clkevt_set_state_shutdown; -static int hpet_legacy_set_periodic(struct clock_event_device *evt) -{ - return hpet_set_periodic(evt, 0); + evt->features = CLOCK_EVT_FEAT_ONESHOT; + if (hc->boot_cfg & HPET_TN_PERIODIC) { + evt->features |= CLOCK_EVT_FEAT_PERIODIC; + evt->set_state_periodic = hpet_clkevt_set_state_periodic; + } } -static int hpet_legacy_resume(struct clock_event_device *evt) +static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) { - return hpet_resume(evt); -} + /* + * Start HPET with the boot CPU's cpumask and make it global after + * the IO_APIC has been initialized. + */ + hc->cpu = boot_cpu_data.cpu_index; + strncpy(hc->name, "hpet", sizeof(hc->name)); + hpet_init_clockevent(hc, 50); -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - return hpet_next_event(delta, evt, 0); -} + hc->evt.tick_resume = hpet_clkevt_legacy_resume; -/* - * The hpet clock event device - */ -static struct clock_event_device hpet_clockevent = { - .name = "hpet", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, - .set_state_periodic = hpet_legacy_set_periodic, - .set_state_oneshot = hpet_legacy_set_oneshot, - .set_state_shutdown = hpet_legacy_shutdown, - .tick_resume = hpet_legacy_resume, - .set_next_event = hpet_legacy_next_event, - .irq = 0, - .rating = 50, -}; + /* + * Legacy horrors and sins from the past. HPET used periodic mode + * unconditionally forever on the legacy channel 0. Removing the + * below hack and using the conditional in hpet_init_clockevent() + * makes at least Qemu and one hardware machine fail to boot. + * There are two issues which cause the boot failure: + * + * #1 After the timer delivery test in IOAPIC and the IOAPIC setup + * the next interrupt is not delivered despite the HPET channel + * being programmed correctly. Reprogramming the HPET after + * switching to IOAPIC makes it work again. After fixing this, + * the next issue surfaces: + * + * #2 Due to the unconditional periodic mode availability the Local + * APIC timer calibration can hijack the global clockevents + * event handler without causing damage. Using oneshot at this + * stage makes if hang because the HPET does not get + * reprogrammed due to the handler hijacking. Duh, stupid me! + * + * Both issues require major surgery and especially the kick HPET + * again after enabling IOAPIC results in really nasty hackery. + * This 'assume periodic works' magic has survived since HPET + * support got added, so it's questionable whether this should be + * fixed. Both Qemu and the failing hardware machine support + * periodic mode despite the fact that both don't advertise it in + * the configuration register and both need that extra kick after + * switching to IOAPIC. Seems to be a feature... + */ + hc->evt.features |= CLOCK_EVT_FEAT_PERIODIC; + hc->evt.set_state_periodic = hpet_clkevt_set_state_periodic; + + /* Start HPET legacy interrupts */ + hpet_enable_legacy_int(); + + clockevents_config_and_register(&hc->evt, hpet_freq, + HPET_MIN_PROG_DELTA, 0x7FFFFFFF); + global_clock_event = &hc->evt; + pr_debug("Clockevent registered\n"); +} /* * HPET MSI Support */ #ifdef CONFIG_PCI_MSI -static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); -static struct hpet_dev *hpet_devs; -static struct irq_domain *hpet_domain; - void hpet_msi_unmask(struct irq_data *data) { - struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); + struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; - /* unmask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg |= HPET_TN_ENABLE | HPET_TN_FSB; - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); + hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } void hpet_msi_mask(struct irq_data *data) { - struct hpet_dev *hdev = irq_data_get_irq_handler_data(data); + struct hpet_channel *hc = irq_data_get_irq_handler_data(data); unsigned int cfg; - /* mask it */ - cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg = hpet_readl(HPET_Tn_CFG(hc->num)); cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB); - hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); -} - -void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg) -{ - hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); - hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); + hpet_writel(cfg, HPET_Tn_CFG(hc->num)); } -void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg) +void hpet_msi_write(struct hpet_channel *hc, struct msi_msg *msg) { - msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); - msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); - msg->address_hi = 0; + hpet_writel(msg->data, HPET_Tn_ROUTE(hc->num)); + hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hc->num) + 4); } -static int hpet_msi_shutdown(struct clock_event_device *evt) +static int hpet_clkevt_msi_resume(struct clock_event_device *evt) { - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_shutdown(evt, hdev->num); -} - -static int hpet_msi_set_oneshot(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_set_oneshot(evt, hdev->num); -} - -static int hpet_msi_set_periodic(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - - return hpet_set_periodic(evt, hdev->num); -} - -static int hpet_msi_resume(struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - struct irq_data *data = irq_get_irq_data(hdev->irq); + struct hpet_channel *hc = clockevent_to_channel(evt); + struct irq_data *data = irq_get_irq_data(hc->irq); struct msi_msg msg; /* Restore the MSI msg and unmask the interrupt */ irq_chip_compose_msi_msg(data, &msg); - hpet_msi_write(hdev, &msg); + hpet_msi_write(hc, &msg); hpet_msi_unmask(data); return 0; } -static int hpet_msi_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); - return hpet_next_event(delta, evt, hdev->num); -} - -static irqreturn_t hpet_interrupt_handler(int irq, void *data) +static irqreturn_t hpet_msi_interrupt_handler(int irq, void *data) { - struct hpet_dev *dev = (struct hpet_dev *)data; - struct clock_event_device *hevt = &dev->evt; + struct hpet_channel *hc = data; + struct clock_event_device *evt = &hc->evt; - if (!hevt->event_handler) { - printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", - dev->num); + if (!evt->event_handler) { + pr_info("Spurious interrupt HPET channel %d\n", hc->num); return IRQ_HANDLED; } - hevt->event_handler(hevt); + evt->event_handler(evt); return IRQ_HANDLED; } -static int hpet_setup_irq(struct hpet_dev *dev) +static int hpet_setup_msi_irq(struct hpet_channel *hc) { - - if (request_irq(dev->irq, hpet_interrupt_handler, + if (request_irq(hc->irq, hpet_msi_interrupt_handler, IRQF_TIMER | IRQF_NOBALANCING, - dev->name, dev)) + hc->name, hc)) return -1; - disable_irq(dev->irq); - irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); - enable_irq(dev->irq); + disable_irq(hc->irq); + irq_set_affinity(hc->irq, cpumask_of(hc->cpu)); + enable_irq(hc->irq); - printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", - dev->name, dev->irq); + pr_debug("%s irq %u for MSI\n", hc->name, hc->irq); return 0; } -/* This should be called in specific @cpu */ -static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) +/* Invoked from the hotplug callback on @cpu */ +static void init_one_hpet_msi_clockevent(struct hpet_channel *hc, int cpu) { - struct clock_event_device *evt = &hdev->evt; - - WARN_ON(cpu != smp_processor_id()); - if (!(hdev->flags & HPET_DEV_VALID)) - return; - - hdev->cpu = cpu; - per_cpu(cpu_hpet_dev, cpu) = hdev; - evt->name = hdev->name; - hpet_setup_irq(hdev); - evt->irq = hdev->irq; + struct clock_event_device *evt = &hc->evt; - evt->rating = 110; - evt->features = CLOCK_EVT_FEAT_ONESHOT; - if (hdev->flags & HPET_DEV_PERI_CAP) { - evt->features |= CLOCK_EVT_FEAT_PERIODIC; - evt->set_state_periodic = hpet_msi_set_periodic; - } + hc->cpu = cpu; + per_cpu(cpu_hpet_channel, cpu) = hc; + hpet_setup_msi_irq(hc); - evt->set_state_shutdown = hpet_msi_shutdown; - evt->set_state_oneshot = hpet_msi_set_oneshot; - evt->tick_resume = hpet_msi_resume; - evt->set_next_event = hpet_msi_next_event; - evt->cpumask = cpumask_of(hdev->cpu); + hpet_init_clockevent(hc, 110); + evt->tick_resume = hpet_clkevt_msi_resume; clockevents_config_and_register(evt, hpet_freq, HPET_MIN_PROG_DELTA, 0x7FFFFFFF); } -#ifdef CONFIG_HPET -/* Reserve at least one timer for userspace (/dev/hpet) */ -#define RESERVE_TIMERS 1 -#else -#define RESERVE_TIMERS 0 -#endif - -static void hpet_msi_capability_lookup(unsigned int start_timer) +static struct hpet_channel *hpet_get_unused_clockevent(void) { - unsigned int id; - unsigned int num_timers; - unsigned int num_timers_used = 0; - int i, irq; - - if (hpet_msi_disable) - return; - - if (boot_cpu_has(X86_FEATURE_ARAT)) - return; - id = hpet_readl(HPET_ID); - - num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); - num_timers++; /* Value read out starts from 0 */ - hpet_print_config(); - - hpet_domain = hpet_create_irq_domain(hpet_blockid); - if (!hpet_domain) - return; - - hpet_devs = kcalloc(num_timers, sizeof(struct hpet_dev), GFP_KERNEL); - if (!hpet_devs) - return; - - hpet_num_timers = num_timers; - - for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { - struct hpet_dev *hdev = &hpet_devs[num_timers_used]; - unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); - - /* Only consider HPET timer with MSI support */ - if (!(cfg & HPET_TN_FSB_CAP)) - continue; + int i; - hdev->flags = 0; - if (cfg & HPET_TN_PERIODIC_CAP) - hdev->flags |= HPET_DEV_PERI_CAP; - sprintf(hdev->name, "hpet%d", i); - hdev->num = i; + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; - irq = hpet_assign_irq(hpet_domain, hdev, hdev->num); - if (irq <= 0) + if (hc->mode != HPET_MODE_CLOCKEVT || hc->in_use) continue; - - hdev->irq = irq; - hdev->flags |= HPET_DEV_FSB_CAP; - hdev->flags |= HPET_DEV_VALID; - num_timers_used++; - if (num_timers_used == num_possible_cpus()) - break; + hc->in_use = 1; + return hc; } - - printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", - num_timers, num_timers_used); + return NULL; } -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) +static int hpet_cpuhp_online(unsigned int cpu) { - int i; - - if (!hpet_devs) - return; + struct hpet_channel *hc = hpet_get_unused_clockevent(); - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; + if (hc) + init_one_hpet_msi_clockevent(hc, cpu); + return 0; +} - if (!(hdev->flags & HPET_DEV_VALID)) - continue; +static int hpet_cpuhp_dead(unsigned int cpu) +{ + struct hpet_channel *hc = per_cpu(cpu_hpet_channel, cpu); - hd->hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(hd, hdev->num); - } + if (!hc) + return 0; + free_irq(hc->irq, hc); + hc->in_use = 0; + per_cpu(cpu_hpet_channel, cpu) = NULL; + return 0; } -#endif -static struct hpet_dev *hpet_get_unused_timer(void) +static void __init hpet_select_clockevents(void) { - int i; + unsigned int i; - if (!hpet_devs) - return NULL; + hpet_base.nr_clockevents = 0; - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; + /* No point if MSI is disabled or CPU has an Always Runing APIC Timer */ + if (hpet_msi_disable || boot_cpu_has(X86_FEATURE_ARAT)) + return; - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - if (test_and_set_bit(HPET_DEV_USED_BIT, - (unsigned long *)&hdev->flags)) - continue; - return hdev; - } - return NULL; -} + hpet_print_config(); -struct hpet_work_struct { - struct delayed_work work; - struct completion complete; -}; + hpet_domain = hpet_create_irq_domain(hpet_blockid); + if (!hpet_domain) + return; -static void hpet_work(struct work_struct *w) -{ - struct hpet_dev *hdev; - int cpu = smp_processor_id(); - struct hpet_work_struct *hpet_work; + for (i = 0; i < hpet_base.nr_channels; i++) { + struct hpet_channel *hc = hpet_base.channels + i; + int irq; - hpet_work = container_of(w, struct hpet_work_struct, work.work); + if (hc->mode != HPET_MODE_UNUSED) + continue; - hdev = hpet_get_unused_timer(); - if (hdev) - init_one_hpet_msi_clockevent(hdev, cpu); + /* Only consider HPET channel with MSI support */ + if (!(hc->boot_cfg & HPET_TN_FSB_CAP)) + continue; - complete(&hpet_work->complete); -} + sprintf(hc->name, "hpet%d", i); -static int hpet_cpuhp_online(unsigned int cpu) -{ - struct hpet_work_struct work; - - INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work); - init_completion(&work.complete); - /* FIXME: add schedule_work_on() */ - schedule_delayed_work_on(cpu, &work.work, 0); - wait_for_completion(&work.complete); - destroy_delayed_work_on_stack(&work.work); - return 0; -} + irq = hpet_assign_irq(hpet_domain, hc, hc->num); + if (irq <= 0) + continue; -static int hpet_cpuhp_dead(unsigned int cpu) -{ - struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); + hc->irq = irq; + hc->mode = HPET_MODE_CLOCKEVT; - if (!hdev) - return 0; - free_irq(hdev->irq, hdev); - hdev->flags &= ~HPET_DEV_USED; - per_cpu(cpu_hpet_dev, cpu) = NULL; - return 0; -} -#else + if (++hpet_base.nr_clockevents == num_possible_cpus()) + break; + } -static void hpet_msi_capability_lookup(unsigned int start_timer) -{ - return; + pr_info("%d channels of %d reserved for per-cpu timers\n", + hpet_base.nr_channels, hpet_base.nr_clockevents); } -#ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - return; -} -#endif +#else + +static inline void hpet_select_clockevents(void) { } #define hpet_cpuhp_online NULL #define hpet_cpuhp_dead NULL @@ -754,10 +650,10 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) /* * Reading the HPET counter is a very slow operation. If a large number of * CPUs are trying to access the HPET counter simultaneously, it can cause - * massive delay and slow down system performance dramatically. This may + * massive delays and slow down system performance dramatically. This may * happen when HPET is the default clock source instead of TSC. For a * really large system with hundreds of CPUs, the slowdown may be so - * severe that it may actually crash the system because of a NMI watchdog + * severe, that it can actually crash the system because of a NMI watchdog * soft lockup, for example. * * If multiple CPUs are trying to access the HPET counter at the same time, @@ -766,10 +662,9 @@ static void hpet_reserve_msi_timers(struct hpet_data *hd) * * This special feature is only enabled on x86-64 systems. It is unlikely * that 32-bit x86 systems will have enough CPUs to require this feature - * with its associated locking overhead. And we also need 64-bit atomic - * read. + * with its associated locking overhead. We also need 64-bit atomic read. * - * The lock and the hpet value are stored together and can be read in a + * The lock and the HPET value are stored together and can be read in a * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t * is 32 bits in size. */ @@ -858,15 +753,40 @@ static struct clocksource clocksource_hpet = { .resume = hpet_resume_counter, }; -static int hpet_clocksource_register(void) +/* + * AMD SB700 based systems with spread spectrum enabled use a SMM based + * HPET emulation to provide proper frequency setting. + * + * On such systems the SMM code is initialized with the first HPET register + * access and takes some time to complete. During this time the config + * register reads 0xffffffff. We check for max 1000 loops whether the + * config register reads a non-0xffffffff value to make sure that the + * HPET is up and running before we proceed any further. + * + * A counting loop is safe, as the HPET access takes thousands of CPU cycles. + * + * On non-SB700 based machines this check is only done once and has no + * side effects. + */ +static bool __init hpet_cfg_working(void) { - u64 start, now; - u64 t1; + int i; + + for (i = 0; i < 1000; i++) { + if (hpet_readl(HPET_CFG) != 0xFFFFFFFF) + return true; + } + + pr_warn("Config register invalid. Disabling HPET\n"); + return false; +} + +static bool __init hpet_counting(void) +{ + u64 start, now, t1; - /* Start the counter */ hpet_restart_counter(); - /* Verify whether hpet counter works */ t1 = hpet_readl(HPET_COUNTER); start = rdtsc(); @@ -877,30 +797,24 @@ static int hpet_clocksource_register(void) * 1 GHz == 200us */ do { - rep_nop(); + if (t1 != hpet_readl(HPET_COUNTER)) + return true; now = rdtsc(); } while ((now - start) < 200000UL); - if (t1 == hpet_readl(HPET_COUNTER)) { - printk(KERN_WARNING - "HPET counter not counting. HPET disabled\n"); - return -ENODEV; - } - - clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); - return 0; + pr_warn("Counter not counting. HPET disabled\n"); + return false; } -static u32 *hpet_boot_cfg; - /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ int __init hpet_enable(void) { - u32 hpet_period, cfg, id; + u32 hpet_period, cfg, id, irq; + unsigned int i, channels; + struct hpet_channel *hc; u64 freq; - unsigned int i, last; if (!is_hpet_capable()) return 0; @@ -909,40 +823,22 @@ int __init hpet_enable(void) if (!hpet_virt_address) return 0; + /* Validate that the config register is working */ + if (!hpet_cfg_working()) + goto out_nohpet; + + /* Validate that the counter is counting */ + if (!hpet_counting()) + goto out_nohpet; + /* * Read the period and check for a sane value: */ hpet_period = hpet_readl(HPET_PERIOD); - - /* - * AMD SB700 based systems with spread spectrum enabled use a - * SMM based HPET emulation to provide proper frequency - * setting. The SMM code is initialized with the first HPET - * register access and takes some time to complete. During - * this time the config register reads 0xffffffff. We check - * for max. 1000 loops whether the config register reads a non - * 0xffffffff value to make sure that HPET is up and running - * before we go further. A counting loop is safe, as the HPET - * access takes thousands of CPU cycles. On non SB700 based - * machines this check is only done once and has no side - * effects. - */ - for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { - if (i == 1000) { - printk(KERN_WARNING - "HPET config register value = 0xFFFFFFFF. " - "Disabling HPET\n"); - goto out_nohpet; - } - } - if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) goto out_nohpet; - /* - * The period is a femto seconds value. Convert it to a - * frequency. - */ + /* The period is a femtoseconds value. Convert it to a frequency. */ freq = FSEC_PER_SEC; do_div(freq, hpet_period); hpet_freq = freq; @@ -954,72 +850,90 @@ int __init hpet_enable(void) id = hpet_readl(HPET_ID); hpet_print_config(); - last = (id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT; + /* This is the HPET channel number which is zero based */ + channels = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; -#ifdef CONFIG_HPET_EMULATE_RTC /* * The legacy routing mode needs at least two channels, tick timer * and the rtc emulation channel. */ - if (!last) + if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC) && channels < 2) goto out_nohpet; -#endif + hc = kcalloc(channels, sizeof(*hc), GFP_KERNEL); + if (!hc) { + pr_warn("Disabling HPET.\n"); + goto out_nohpet; + } + hpet_base.channels = hc; + hpet_base.nr_channels = channels; + + /* Read, store and sanitize the global configuration */ cfg = hpet_readl(HPET_CFG); - hpet_boot_cfg = kmalloc_array(last + 2, sizeof(*hpet_boot_cfg), - GFP_KERNEL); - if (hpet_boot_cfg) - *hpet_boot_cfg = cfg; - else - pr_warn("HPET initial state will not be saved\n"); + hpet_base.boot_cfg = cfg; cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); hpet_writel(cfg, HPET_CFG); if (cfg) - pr_warn("Unrecognized bits %#x set in global cfg\n", cfg); + pr_warn("Global config: Unknown bits %#x\n", cfg); + + /* Read, store and sanitize the per channel configuration */ + for (i = 0; i < channels; i++, hc++) { + hc->num = i; - for (i = 0; i <= last; ++i) { cfg = hpet_readl(HPET_Tn_CFG(i)); - if (hpet_boot_cfg) - hpet_boot_cfg[i + 1] = cfg; + hc->boot_cfg = cfg; + irq = (cfg & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; + hc->irq = irq; + cfg &= ~(HPET_TN_ENABLE | HPET_TN_LEVEL | HPET_TN_FSB); hpet_writel(cfg, HPET_Tn_CFG(i)); + cfg &= ~(HPET_TN_PERIODIC | HPET_TN_PERIODIC_CAP | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_FSB | HPET_TN_FSB_CAP); if (cfg) - pr_warn("Unrecognized bits %#x set in cfg#%u\n", - cfg, i); + pr_warn("Channel #%u config: Unknown bits %#x\n", i, cfg); } hpet_print_config(); - if (hpet_clocksource_register()) - goto out_nohpet; + clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); if (id & HPET_ID_LEGSUP) { - hpet_legacy_clockevent_register(); + hpet_legacy_clockevent_register(&hpet_base.channels[0]); + hpet_base.channels[0].mode = HPET_MODE_LEGACY; + if (IS_ENABLED(CONFIG_HPET_EMULATE_RTC)) + hpet_base.channels[1].mode = HPET_MODE_LEGACY; return 1; } return 0; out_nohpet: + kfree(hpet_base.channels); + hpet_base.channels = NULL; + hpet_base.nr_channels = 0; hpet_clear_mapping(); hpet_address = 0; return 0; } /* - * Needs to be late, as the reserve_timer code calls kalloc ! + * The late initialization runs after the PCI quirks have been invoked + * which might have detected a system on which the HPET can be enforced. + * + * Also, the MSI machinery is not working yet when the HPET is initialized + * early. * - * Not a problem on i386 as hpet_enable is called from late_time_init, - * but on x86_64 it is necessary ! + * If the HPET is enabled, then: + * + * 1) Reserve one channel for /dev/hpet if CONFIG_HPET=y + * 2) Reserve up to num_possible_cpus() channels as per CPU clockevents + * 3) Setup /dev/hpet if CONFIG_HPET=y + * 4) Register hotplug callbacks when clockevents are available */ static __init int hpet_late_init(void) { int ret; - if (boot_hpet_disable) - return -ENODEV; - if (!hpet_address) { if (!force_hpet_address) return -ENODEV; @@ -1031,21 +945,14 @@ static __init int hpet_late_init(void) if (!hpet_virt_address) return -ENODEV; - if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) - hpet_msi_capability_lookup(2); - else - hpet_msi_capability_lookup(0); - - hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + hpet_select_device_channel(); + hpet_select_clockevents(); + hpet_reserve_platform_timers(); hpet_print_config(); - if (hpet_msi_disable) + if (!hpet_base.nr_clockevents) return 0; - if (boot_cpu_has(X86_FEATURE_ARAT)) - return 0; - - /* This notifier should be called after workqueue is ready */ ret = cpuhp_setup_state(CPUHP_AP_X86_HPET_ONLINE, "x86/hpet:online", hpet_cpuhp_online, NULL); if (ret) @@ -1064,47 +971,47 @@ fs_initcall(hpet_late_init); void hpet_disable(void) { - if (is_hpet_capable() && hpet_virt_address) { - unsigned int cfg = hpet_readl(HPET_CFG), id, last; - - if (hpet_boot_cfg) - cfg = *hpet_boot_cfg; - else if (hpet_legacy_int_enabled) { - cfg &= ~HPET_CFG_LEGACY; - hpet_legacy_int_enabled = false; - } - cfg &= ~HPET_CFG_ENABLE; - hpet_writel(cfg, HPET_CFG); + unsigned int i; + u32 cfg; - if (!hpet_boot_cfg) - return; + if (!is_hpet_capable() || !hpet_virt_address) + return; - id = hpet_readl(HPET_ID); - last = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + /* Restore boot configuration with the enable bit cleared */ + cfg = hpet_base.boot_cfg; + cfg &= ~HPET_CFG_ENABLE; + hpet_writel(cfg, HPET_CFG); - for (id = 0; id <= last; ++id) - hpet_writel(hpet_boot_cfg[id + 1], HPET_Tn_CFG(id)); + /* Restore the channel boot configuration */ + for (i = 0; i < hpet_base.nr_channels; i++) + hpet_writel(hpet_base.channels[i].boot_cfg, HPET_Tn_CFG(i)); - if (*hpet_boot_cfg & HPET_CFG_ENABLE) - hpet_writel(*hpet_boot_cfg, HPET_CFG); - } + /* If the HPET was enabled at boot time, reenable it */ + if (hpet_base.boot_cfg & HPET_CFG_ENABLE) + hpet_writel(hpet_base.boot_cfg, HPET_CFG); } #ifdef CONFIG_HPET_EMULATE_RTC -/* HPET in LegacyReplacement Mode eats up RTC interrupt line. When, HPET +/* + * HPET in LegacyReplacement mode eats up the RTC interrupt line. When HPET * is enabled, we support RTC interrupt functionality in software. + * * RTC has 3 kinds of interrupts: - * 1) Update Interrupt - generate an interrupt, every sec, when RTC clock - * is updated - * 2) Alarm Interrupt - generate an interrupt at a specific time of day - * 3) Periodic Interrupt - generate periodic interrupt, with frequencies - * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all freqs in powers of 2) - * (1) and (2) above are implemented using polling at a frequency of - * 64 Hz. The exact frequency is a tradeoff between accuracy and interrupt - * overhead. (DEFAULT_RTC_INT_FREQ) - * For (3), we use interrupts at 64Hz or user specified periodic - * frequency, whichever is higher. + * + * 1) Update Interrupt - generate an interrupt, every second, when the + * RTC clock is updated + * 2) Alarm Interrupt - generate an interrupt at a specific time of day + * 3) Periodic Interrupt - generate periodic interrupt, with frequencies + * 2Hz-8192Hz (2Hz-64Hz for non-root user) (all frequencies in powers of 2) + * + * (1) and (2) above are implemented using polling at a frequency of 64 Hz: + * DEFAULT_RTC_INT_FREQ. + * + * The exact frequency is a tradeoff between accuracy and interrupt overhead. + * + * For (3), we use interrupts at 64 Hz, or the user specified periodic frequency, + * if it's higher. */ #include <linux/mc146818rtc.h> #include <linux/rtc.h> @@ -1125,7 +1032,7 @@ static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* - * Check that the hpet counter c1 is ahead of the c2 + * Check that the HPET counter c1 is ahead of c2 */ static inline int hpet_cnt_ahead(u32 c1, u32 c2) { @@ -1163,8 +1070,8 @@ void hpet_unregister_irq_handler(rtc_irq_handler handler) EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); /* - * Timer 1 for RTC emulation. We use one shot mode, as periodic mode - * is not supported by all HPET implementations for timer 1. + * Channel 1 for RTC emulation. We use one shot mode, as periodic mode + * is not supported by all HPET implementations for channel 1. * * hpet_rtc_timer_init() is called when the rtc is initialized. */ @@ -1177,10 +1084,11 @@ int hpet_rtc_timer_init(void) return 0; if (!hpet_default_delta) { + struct clock_event_device *evt = &hpet_base.channels[0].evt; uint64_t clc; - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; - clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; + clc = (uint64_t) evt->mult * NSEC_PER_SEC; + clc >>= evt->shift + DEFAULT_RTC_SHIFT; hpet_default_delta = clc; } @@ -1209,6 +1117,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init); static void hpet_disable_rtc_channel(void) { u32 cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; hpet_writel(cfg, HPET_T1_CFG); } @@ -1250,8 +1159,7 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) } EXPORT_SYMBOL_GPL(hpet_set_rtc_irq_bit); -int hpet_set_alarm_time(unsigned char hrs, unsigned char min, - unsigned char sec) +int hpet_set_alarm_time(unsigned char hrs, unsigned char min, unsigned char sec) { if (!is_hpet_enabled()) return 0; @@ -1271,15 +1179,18 @@ int hpet_set_periodic_freq(unsigned long freq) if (!is_hpet_enabled()) return 0; - if (freq <= DEFAULT_RTC_INT_FREQ) + if (freq <= DEFAULT_RTC_INT_FREQ) { hpet_pie_limit = DEFAULT_RTC_INT_FREQ / freq; - else { - clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; + } else { + struct clock_event_device *evt = &hpet_base.channels[0].evt; + + clc = (uint64_t) evt->mult * NSEC_PER_SEC; do_div(clc, freq); - clc >>= hpet_clockevent.shift; + clc >>= evt->shift; hpet_pie_delta = clc; hpet_pie_limit = 0; } + return 1; } EXPORT_SYMBOL_GPL(hpet_set_periodic_freq); @@ -1317,8 +1228,7 @@ static void hpet_rtc_timer_reinit(void) if (hpet_rtc_flags & RTC_PIE) hpet_pie_count += lost_ints; if (printk_ratelimit()) - printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", - lost_ints); + pr_warn("Lost %d RTC interrupts\n", lost_ints); } } @@ -1340,8 +1250,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) hpet_prev_update_sec = curr_time.tm_sec; } - if (hpet_rtc_flags & RTC_PIE && - ++hpet_pie_count >= hpet_pie_limit) { + if (hpet_rtc_flags & RTC_PIE && ++hpet_pie_count >= hpet_pie_limit) { rtc_int_flag |= RTC_PF; hpet_pie_count = 0; } @@ -1350,7 +1259,7 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) (curr_time.tm_sec == hpet_alarm_time.tm_sec) && (curr_time.tm_min == hpet_alarm_time.tm_min) && (curr_time.tm_hour == hpet_alarm_time.tm_hour)) - rtc_int_flag |= RTC_AF; + rtc_int_flag |= RTC_AF; if (rtc_int_flag) { rtc_int_flag |= (RTC_IRQF | (RTC_NUM_INTS << 8)); diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0d307a657abb..2b7999a1a50a 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h> +#include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/smp.h> @@ -18,10 +19,32 @@ */ struct clock_event_device *global_clock_event; -void __init setup_pit_timer(void) +/* + * Modern chipsets can disable the PIT clock which makes it unusable. It + * would be possible to enable the clock but the registers are chipset + * specific and not discoverable. Avoid the whack a mole game. + * + * These platforms have discoverable TSC/CPU frequencies but this also + * requires to know the local APIC timer frequency as it normally is + * calibrated against the PIT interrupt. + */ +static bool __init use_pit(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC)) + return true; + + /* This also returns true when APIC is disabled */ + return apic_needs_pit(); +} + +bool __init pit_timer_init(void) { + if (!use_pit()) + return false; + clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; + return true; } #ifndef CONFIG_X86_64 diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d2482bbbe3d0..87ef69a72c52 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -319,7 +319,8 @@ void __init idt_setup_apic_and_irq_gates(void) #ifdef CONFIG_X86_LOCAL_APIC for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); - set_intr_gate(i, spurious_interrupt); + entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + set_intr_gate(i, entry); } #endif } diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index 64b973f0e985..4c407833faca 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -11,10 +11,11 @@ extern struct boot_params boot_params; static enum efi_secureboot_mode get_sb_mode(void) { efi_char16_t efi_SecureBoot_name[] = L"SecureBoot"; + efi_char16_t efi_SetupMode_name[] = L"SecureBoot"; efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; efi_status_t status; unsigned long size; - u8 secboot; + u8 secboot, setupmode; size = sizeof(secboot); @@ -36,7 +37,14 @@ static enum efi_secureboot_mode get_sb_mode(void) return efi_secureboot_mode_unknown; } - if (secboot == 0) { + size = sizeof(setupmode); + status = efi.get_variable(efi_SetupMode_name, &efi_variable_guid, + NULL, &size, &setupmode); + + if (status != EFI_SUCCESS) /* ignore unknown SetupMode */ + setupmode = 0; + + if (secboot == 0 || setupmode == 1) { pr_info("ima: secureboot mode disabled\n"); return efi_secureboot_mode_disabled; } diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 805b7a341aca..fdb6506ceaaa 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c @@ -13,7 +13,22 @@ #include <linux/dmi.h> #include <linux/io.h> -int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE; +#define IO_DELAY_TYPE_0X80 0 +#define IO_DELAY_TYPE_0XED 1 +#define IO_DELAY_TYPE_UDELAY 2 +#define IO_DELAY_TYPE_NONE 3 + +#if defined(CONFIG_IO_DELAY_0X80) +#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0X80 +#elif defined(CONFIG_IO_DELAY_0XED) +#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_0XED +#elif defined(CONFIG_IO_DELAY_UDELAY) +#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_UDELAY +#elif defined(CONFIG_IO_DELAY_NONE) +#define DEFAULT_IO_DELAY_TYPE IO_DELAY_TYPE_NONE +#endif + +int io_delay_type __read_mostly = DEFAULT_IO_DELAY_TYPE; static int __initdata io_delay_override; @@ -24,13 +39,13 @@ void native_io_delay(void) { switch (io_delay_type) { default: - case CONFIG_IO_DELAY_TYPE_0X80: + case IO_DELAY_TYPE_0X80: asm volatile ("outb %al, $0x80"); break; - case CONFIG_IO_DELAY_TYPE_0XED: + case IO_DELAY_TYPE_0XED: asm volatile ("outb %al, $0xed"); break; - case CONFIG_IO_DELAY_TYPE_UDELAY: + case IO_DELAY_TYPE_UDELAY: /* * 2 usecs is an upper-bound for the outb delay but * note that udelay doesn't have the bus-level @@ -39,7 +54,8 @@ void native_io_delay(void) * are shorter until calibrated): */ udelay(2); - case CONFIG_IO_DELAY_TYPE_NONE: + break; + case IO_DELAY_TYPE_NONE: break; } } @@ -47,9 +63,9 @@ EXPORT_SYMBOL(native_io_delay); static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id) { - if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) { + if (io_delay_type == IO_DELAY_TYPE_0X80) { pr_notice("%s: using 0xed I/O delay port\n", id->ident); - io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; + io_delay_type = IO_DELAY_TYPE_0XED; } return 0; @@ -115,13 +131,13 @@ static int __init io_delay_param(char *s) return -EINVAL; if (!strcmp(s, "0x80")) - io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; + io_delay_type = IO_DELAY_TYPE_0X80; else if (!strcmp(s, "0xed")) - io_delay_type = CONFIG_IO_DELAY_TYPE_0XED; + io_delay_type = IO_DELAY_TYPE_0XED; else if (!strcmp(s, "udelay")) - io_delay_type = CONFIG_IO_DELAY_TYPE_UDELAY; + io_delay_type = IO_DELAY_TYPE_UDELAY; else if (!strcmp(s, "none")) - io_delay_type = CONFIG_IO_DELAY_TYPE_NONE; + io_delay_type = IO_DELAY_TYPE_NONE; else return -EINVAL; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9b68b5b00ac9..4215653f8a8e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -135,7 +135,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_puts(p, " Machine check polls\n"); #endif -#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) +#ifdef CONFIG_X86_HV_CALLBACK_VECTOR if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) @@ -247,7 +247,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) if (!handle_irq(desc, regs)) { ack_APIC_irq(); - if (desc != VECTOR_RETRIGGERED) { + if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) { pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n", __func__, smp_processor_id(), vector); diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 1b2ee55a2dfb..6857b4577f17 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -45,7 +45,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now) static void __init jailhouse_timer_init(void) { - lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.apic_khz * (1000 / HZ); } static unsigned long jailhouse_get_tsc(void) @@ -203,7 +203,7 @@ bool jailhouse_paravirt(void) return jailhouse_cpuid_base() != 0; } -static bool jailhouse_x2apic_available(void) +static bool __init jailhouse_x2apic_available(void) { /* * The x2APIC is only available if the root cell enabled it. Jailhouse diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e631c358f7f4..044053235302 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -35,41 +35,43 @@ static void bug_at(unsigned char *ip, int line) BUG(); } -static void __ref __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static void __jump_label_set_jump_code(struct jump_entry *entry, + enum jump_label_type type, + union jump_code_union *code, + int init) { - union jump_code_union jmp; const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - const void *expect, *code; + const void *expect; int line; - jmp.jump = 0xe9; - jmp.offset = jump_entry_target(entry) - - (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + code->jump = 0xe9; + code->offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); - if (type == JUMP_LABEL_JMP) { - if (init) { - expect = default_nop; line = __LINE__; - } else { - expect = ideal_nop; line = __LINE__; - } - - code = &jmp.code; + if (init) { + expect = default_nop; line = __LINE__; + } else if (type == JUMP_LABEL_JMP) { + expect = ideal_nop; line = __LINE__; } else { - if (init) { - expect = default_nop; line = __LINE__; - } else { - expect = &jmp.code; line = __LINE__; - } - - code = ideal_nop; + expect = code->code; line = __LINE__; } if (memcmp((void *)jump_entry_code(entry), expect, JUMP_LABEL_NOP_SIZE)) bug_at((void *)jump_entry_code(entry), line); + if (type == JUMP_LABEL_NOP) + memcpy(code, ideal_nop, JUMP_LABEL_NOP_SIZE); +} + +static void __ref __jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) +{ + union jump_code_union code; + + __jump_label_set_jump_code(entry, type, &code, init); + /* * As long as only a single processor is running and the code is still * not marked as RO, text_poke_early() can be used; Checking that @@ -82,12 +84,12 @@ static void __ref __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), code, + text_poke_early((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE); return; } - text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE, + text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); } @@ -99,6 +101,75 @@ void arch_jump_label_transform(struct jump_entry *entry, mutex_unlock(&text_mutex); } +#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) +static struct text_poke_loc tp_vec[TP_VEC_MAX]; +static int tp_vec_nr; + +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) +{ + struct text_poke_loc *tp; + void *entry_code; + + if (system_state == SYSTEM_BOOTING) { + /* + * Fallback to the non-batching mode. + */ + arch_jump_label_transform(entry, type); + return true; + } + + /* + * No more space in the vector, tell upper layer to apply + * the queue before continuing. + */ + if (tp_vec_nr == TP_VEC_MAX) + return false; + + tp = &tp_vec[tp_vec_nr]; + + entry_code = (void *)jump_entry_code(entry); + + /* + * The INT3 handler will do a bsearch in the queue, so we need entries + * to be sorted. We can survive an unsorted list by rejecting the entry, + * forcing the generic jump_label code to apply the queue. Warning once, + * to raise the attention to the case of an unsorted entry that is + * better not happen, because, in the worst case we will perform in the + * same way as we do without batching - with some more overhead. + */ + if (tp_vec_nr > 0) { + int prev = tp_vec_nr - 1; + struct text_poke_loc *prev_tp = &tp_vec[prev]; + + if (WARN_ON_ONCE(prev_tp->addr > entry_code)) + return false; + } + + __jump_label_set_jump_code(entry, type, + (union jump_code_union *) &tp->opcode, 0); + + tp->addr = entry_code; + tp->detour = entry_code + JUMP_LABEL_NOP_SIZE; + tp->len = JUMP_LABEL_NOP_SIZE; + + tp_vec_nr++; + + return true; +} + +void arch_jump_label_transform_apply(void) +{ + if (!tp_vec_nr) + return; + + mutex_lock(&text_mutex); + text_poke_bp_batch(tp_vec, tp_vec_nr); + mutex_unlock(&text_mutex); + + tp_vec_nr = 0; +} + static enum { JL_STATE_START, JL_STATE_NO_UPDATE, diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 7670ac2bda3a..edaa30b20841 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -67,33 +67,18 @@ static const struct file_operations fops_setup_data = { .llseek = default_llseek, }; -static int __init +static void __init create_setup_data_node(struct dentry *parent, int no, struct setup_data_node *node) { - struct dentry *d, *type, *data; + struct dentry *d; char buf[16]; sprintf(buf, "%d", no); d = debugfs_create_dir(buf, parent); - if (!d) - return -ENOMEM; - - type = debugfs_create_x32("type", S_IRUGO, d, &node->type); - if (!type) - goto err_dir; - - data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); - if (!data) - goto err_type; - return 0; - -err_type: - debugfs_remove(type); -err_dir: - debugfs_remove(d); - return -ENOMEM; + debugfs_create_x32("type", S_IRUGO, d, &node->type); + debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); } static int __init create_setup_data_nodes(struct dentry *parent) @@ -106,8 +91,6 @@ static int __init create_setup_data_nodes(struct dentry *parent) int no = 0; d = debugfs_create_dir("setup_data", parent); - if (!d) - return -ENOMEM; pa_data = boot_params.hdr.setup_data; @@ -128,19 +111,17 @@ static int __init create_setup_data_nodes(struct dentry *parent) node->paddr = pa_data; node->type = data->type; node->len = data->len; - error = create_setup_data_node(d, no, node); + create_setup_data_node(d, no, node); pa_data = data->next; memunmap(data); - if (error) - goto err_dir; no++; } return 0; err_dir: - debugfs_remove(d); + debugfs_remove_recursive(d); return error; } @@ -151,35 +132,18 @@ static struct debugfs_blob_wrapper boot_params_blob = { static int __init boot_params_kdebugfs_init(void) { - struct dentry *dbp, *version, *data; - int error = -ENOMEM; + struct dentry *dbp; + int error; dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); - if (!dbp) - return -ENOMEM; - - version = debugfs_create_x16("version", S_IRUGO, dbp, - &boot_params.hdr.version); - if (!version) - goto err_dir; - data = debugfs_create_blob("data", S_IRUGO, dbp, - &boot_params_blob); - if (!data) - goto err_version; + debugfs_create_x16("version", S_IRUGO, dbp, &boot_params.hdr.version); + debugfs_create_blob("data", S_IRUGO, dbp, &boot_params_blob); error = create_setup_data_nodes(dbp); if (error) - goto err_data; + debugfs_remove_recursive(dbp); - return 0; - -err_data: - debugfs_remove(data); -err_version: - debugfs_remove(version); -err_dir: - debugfs_remove(dbp); return error; } #endif /* CONFIG_DEBUG_BOOT_PARAMS */ @@ -189,8 +153,6 @@ static int __init arch_kdebugfs_init(void) int error = 0; arch_debugfs_dir = debugfs_create_dir("x86", NULL); - if (!arch_debugfs_dir) - return -ENOMEM; #ifdef CONFIG_DEBUG_BOOT_PARAMS error = boot_params_kdebugfs_init(); diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index f03237e3f192..5ebcd02cbca7 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -319,6 +319,11 @@ static int bzImage64_probe(const char *buf, unsigned long len) return ret; } + if (!(header->xloadflags & XLF_5LEVEL) && pgtable_l5_enabled()) { + pr_err("bzImage cannot handle 5-level paging mode.\n"); + return ret; + } + /* I've got a bzImage */ pr_debug("It's a relocatable bzImage64\n"); ret = 0; @@ -414,7 +419,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, efi_map_offset = params_cmdline_sz; efi_setup_data_offset = efi_map_offset + ALIGN(efi_map_sz, 16); - /* Copy setup header onto bootparams. Documentation/x86/boot.txt */ + /* Copy setup header onto bootparams. Documentation/x86/boot.rst */ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset; /* Is there a limit on setup header size? */ diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 6690c5652aeb..23297ea64f5f 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) #ifdef CONFIG_X86_32 switch (regno) { - case GDB_SS: - if (!user_mode(regs)) - *(unsigned long *)mem = __KERNEL_DS; - break; - case GDB_SP: - if (!user_mode(regs)) - *(unsigned long *)mem = kernel_stack_pointer(regs); - break; case GDB_GS: case GDB_FS: *(unsigned long *)mem = 0xFFFF; diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 2b949f4fd4d8..7d3a2e2daf01 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -5,15 +5,10 @@ /* Kprobes and Optprobes common header */ #include <asm/asm.h> - -#ifdef CONFIG_FRAME_POINTER -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \ - " mov %" _ASM_SP ", %" _ASM_BP "\n" -#else -# define SAVE_RBP_STRING " push %" _ASM_BP "\n" -#endif +#include <asm/frame.h> #ifdef CONFIG_X86_64 + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax. */ \ " subq $24, %rsp\n" \ @@ -27,11 +22,13 @@ " pushq %r10\n" \ " pushq %r11\n" \ " pushq %rbx\n" \ - SAVE_RBP_STRING \ + " pushq %rbp\n" \ " pushq %r12\n" \ " pushq %r13\n" \ " pushq %r14\n" \ - " pushq %r15\n" + " pushq %r15\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popq %r15\n" \ " popq %r14\n" \ @@ -51,19 +48,22 @@ /* Skip orig_ax, ip, cs */ \ " addq $24, %rsp\n" #else + #define SAVE_REGS_STRING \ /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ + " subl $4*4, %esp\n" \ " pushl %fs\n" \ " pushl %es\n" \ " pushl %ds\n" \ " pushl %eax\n" \ - SAVE_RBP_STRING \ + " pushl %ebp\n" \ " pushl %edi\n" \ " pushl %esi\n" \ " pushl %edx\n" \ " pushl %ecx\n" \ - " pushl %ebx\n" + " pushl %ebx\n" \ + ENCODE_FRAME_POINTER + #define RESTORE_REGS_STRING \ " popl %ebx\n" \ " popl %ecx\n" \ @@ -72,8 +72,8 @@ " popl %edi\n" \ " popl %ebp\n" \ " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" + /* Skip ds, es, fs, gs, orig_ax, ip, and cs. */\ + " addl $7*4, %esp\n" #endif /* Ensure if the instruction can be boostable */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6afd8061dbae..0e0b08008b5a 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -56,7 +56,7 @@ DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) +#define stack_addr(regs) ((unsigned long *)regs->sp) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -718,29 +718,27 @@ asm( ".global kretprobe_trampoline\n" ".type kretprobe_trampoline, @function\n" "kretprobe_trampoline:\n" -#ifdef CONFIG_X86_64 /* We don't bother saving the ss register */ +#ifdef CONFIG_X86_64 " pushq %rsp\n" " pushfq\n" SAVE_REGS_STRING " movq %rsp, %rdi\n" " call trampoline_handler\n" /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" + " movq %rax, 19*8(%rsp)\n" RESTORE_REGS_STRING " popfq\n" #else - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %eax\n" " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" + /* Replace saved sp with true return address. */ + " movl %eax, 15*4(%esp)\n" RESTORE_REGS_STRING - " popf\n" + " popfl\n" #endif " ret\n" ".size kretprobe_trampoline, .-kretprobe_trampoline\n" @@ -781,16 +779,13 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); /* fixup registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; - /* On x86-64, we use pt_regs->sp for return address holder. */ - frame_pointer = ®s->sp; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; - /* On x86-32, we use pt_regs->flags for return address holder. */ - frame_pointer = ®s->flags; #endif + /* We use pt_regs->sp for return address holder. */ + frame_pointer = ®s->sp; regs->ip = trampoline_address; regs->orig_ax = ~0UL; @@ -813,7 +808,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs) continue; /* * Return probes must be pushed on this hash list correct - * order (same as return order) so that it can be poped + * order (same as return order) so that it can be popped * correctly. However, if we find it is pushed it incorrect * order, this means we find a function which should not be * probed, because the wrong order entry is pushed on the diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7c361a24c6df..9d4aedece363 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -102,14 +102,15 @@ asm ( "optprobe_template_call:\n" ASM_NOP5 /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" + " movq 18*8(%rsp), %rdx\n" + " movq %rdx, 19*8(%rsp)\n" RESTORE_REGS_STRING /* Skip flags entry */ " addq $8, %rsp\n" " popfq\n" #else /* CONFIG_X86_32 */ - " pushf\n" + " pushl %esp\n" + " pushfl\n" SAVE_REGS_STRING " movl %esp, %edx\n" ".global optprobe_template_val\n" @@ -118,9 +119,13 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 + /* Move flags into esp */ + " movl 14*4(%esp), %edx\n" + " movl %edx, 15*4(%esp)\n" RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" + /* Skip flags entry */ + " addl $4, %esp\n" + " popfl\n" #endif ".global optprobe_template_end\n" "optprobe_template_end:\n" @@ -152,10 +157,9 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) } else { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ -#ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); +#ifdef CONFIG_X86_32 + regs->cs |= get_kernel_rpl(); regs->gs = 0; #endif regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; @@ -418,7 +422,7 @@ err: void arch_optimize_kprobes(struct list_head *oplist) { struct optimized_kprobe *op, *tmp; - u8 insn_buf[RELATIVEJUMP_SIZE]; + u8 insn_buff[RELATIVEJUMP_SIZE]; list_for_each_entry_safe(op, tmp, oplist, list) { s32 rel = (s32)((long)op->optinsn.insn - @@ -430,10 +434,10 @@ void arch_optimize_kprobes(struct list_head *oplist) memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, RELATIVE_ADDR_SIZE); - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; + insn_buff[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, op->optinsn.insn); list_del_init(&op->list); @@ -443,12 +447,12 @@ void arch_optimize_kprobes(struct list_head *oplist) /* Replace a relative jump with a breakpoint (int3). */ void arch_unoptimize_kprobe(struct optimized_kprobe *op) { - u8 insn_buf[RELATIVEJUMP_SIZE]; + u8 insn_buff[RELATIVEJUMP_SIZE]; /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE, + insn_buff[0] = BREAKPOINT_INSTRUCTION; + memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, op->optinsn.insn); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 5169b8cc35bb..82caf01b63dd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void) pr_info("KVM setup pv IPIs\n"); } +static void kvm_smp_send_call_func_ipi(const struct cpumask *mask) +{ + int cpu; + + native_send_call_func_ipi(mask); + + /* Make sure other vCPUs get a chance to run if they need to. */ + for_each_cpu(cpu, mask) { + if (vcpu_is_preempted(cpu)) { + kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu)); + break; + } + } +} + static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); @@ -638,6 +653,12 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; + if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; + pr_info("KVM setup pv sched yield\n"); + } if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", kvm_cpu_online, kvm_cpu_down_prepare) < 0) pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n"); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index d7be2376ac0b..5dcd438ad8f2 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -16,6 +16,7 @@ #include <linux/io.h> #include <linux/suspend.h> #include <linux/vmalloc.h> +#include <linux/efi.h> #include <asm/init.h> #include <asm/pgtable.h> @@ -27,6 +28,55 @@ #include <asm/setup.h> #include <asm/set_memory.h> +#ifdef CONFIG_ACPI +/* + * Used while adding mapping for ACPI tables. + * Can be reused when other iomem regions need be mapped + */ +struct init_pgtable_data { + struct x86_mapping_info *info; + pgd_t *level4p; +}; + +static int mem_region_callback(struct resource *res, void *arg) +{ + struct init_pgtable_data *data = arg; + unsigned long mstart, mend; + + mstart = res->start; + mend = mstart + resource_size(res) - 1; + + return kernel_ident_mapping_init(data->info, data->level4p, mstart, mend); +} + +static int +map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) +{ + struct init_pgtable_data data; + unsigned long flags; + int ret; + + data.info = info; + data.level4p = level4p; + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + /* ACPI tables could be located in ACPI Non-volatile Storage region */ + ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, + &data, mem_region_callback); + if (ret && ret != -EINVAL) + return ret; + + return 0; +} +#else +static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } +#endif + #ifdef CONFIG_KEXEC_FILE const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, @@ -34,6 +84,31 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { }; #endif +static int +map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) +{ +#ifdef CONFIG_EFI + unsigned long mstart, mend; + + if (!efi_enabled(EFI_BOOT)) + return 0; + + mstart = (boot_params.efi_info.efi_systab | + ((u64)boot_params.efi_info.efi_systab_hi<<32)); + + if (efi_enabled(EFI_64BIT)) + mend = mstart + sizeof(efi_system_table_64_t); + else + mend = mstart + sizeof(efi_system_table_32_t); + + if (!mstart) + return 0; + + return kernel_ident_mapping_init(info, level4p, mstart, mend); +#endif + return 0; +} + static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); @@ -48,12 +123,13 @@ static void free_transition_pgtable(struct kimage *image) static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) { + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; + unsigned long vaddr, paddr; + int result = -ENOMEM; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; - unsigned long vaddr, paddr; - int result = -ENOMEM; vaddr = (unsigned long)relocate_kernel; paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); @@ -90,7 +166,11 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); } pte = pte_offset_kernel(pmd, vaddr); - set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); + + if (sev_active()) + prot = PAGE_KERNEL_EXEC; + + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); return 0; err: return result; @@ -127,6 +207,11 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); + if (sev_active()) { + info.page_flag |= _PAGE_ENC; + info.kernpg_flag |= _PAGE_ENC; + } + if (direct_gbpages) info.direct_gbpages = true; @@ -157,6 +242,18 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) return result; } + /* + * Prepare EFI systab and ACPI tables for kexec kernel since they are + * not covered by pfn_mapped. + */ + result = map_efi_systab(&info, level4p); + if (result) + return result; + + result = map_acpi_tables(&info, level4p); + if (result) + return result; + return init_transition_pgtable(image, level4p); } @@ -557,8 +654,20 @@ void arch_kexec_unprotect_crashkres(void) kexec_mark_crashkres(false); } +/* + * During a traditional boot under SME, SME will encrypt the kernel, + * so the SME kexec kernel also needs to be un-encrypted in order to + * replicate a normal SME boot. + * + * During a traditional boot under SEV, the kernel has already been + * loaded encrypted, so the SEV kexec kernel needs to be encrypted in + * order to replicate a normal SEV boot. + */ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { + if (sev_active()) + return 0; + /* * If SME is active we need to be sure that kexec pages are * not encrypted because when we boot to the new kernel the @@ -569,6 +678,9 @@ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { + if (sev_active()) + return; + /* * If SME is active we need to reset the pages back to being * an encrypted mapping before freeing them. diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 06f6bb48d018..98039d7fb998 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -58,24 +58,24 @@ struct branch { u32 delta; } __attribute__((packed)); -static unsigned paravirt_patch_call(void *insnbuf, const void *target, +static unsigned paravirt_patch_call(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - struct branch *b = insnbuf; - unsigned long delta = (unsigned long)target - (addr+5); - - if (len < 5) { -#ifdef CONFIG_RETPOLINE - WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr); -#endif - return len; /* call too long for patch site */ + const int call_len = 5; + struct branch *b = insn_buff; + unsigned long delta = (unsigned long)target - (addr+call_len); + + if (len < call_len) { + pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr); + /* Kernel might not be viable if patching fails, bail out: */ + BUG_ON(1); } b->opcode = 0xe8; /* call */ b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != 5); + BUILD_BUG_ON(sizeof(*b) != call_len); - return 5; + return call_len; } #ifdef CONFIG_PARAVIRT_XXL @@ -85,10 +85,10 @@ u64 notrace _paravirt_ident_64(u64 x) return x; } -static unsigned paravirt_patch_jmp(void *insnbuf, const void *target, +static unsigned paravirt_patch_jmp(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - struct branch *b = insnbuf; + struct branch *b = insn_buff; unsigned long delta = (unsigned long)target - (addr+5); if (len < 5) { @@ -113,7 +113,7 @@ void __init native_pv_lock_init(void) static_branch_disable(&virt_spin_lock_key); } -unsigned paravirt_patch_default(u8 type, void *insnbuf, +unsigned paravirt_patch_default(u8 type, void *insn_buff, unsigned long addr, unsigned len) { /* @@ -125,36 +125,36 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf, if (opfunc == NULL) /* If there's no function, patch it with a ud2a (BUG) */ - ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); + ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a)); else if (opfunc == _paravirt_nop) ret = 0; #ifdef CONFIG_PARAVIRT_XXL /* identity functions just return their single argument */ else if (opfunc == _paravirt_ident_64) - ret = paravirt_patch_ident_64(insnbuf, len); + ret = paravirt_patch_ident_64(insn_buff, len); else if (type == PARAVIRT_PATCH(cpu.iret) || type == PARAVIRT_PATCH(cpu.usergs_sysret64)) /* If operation requires a jmp, then jmp */ - ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len); + ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len); #endif else /* Otherwise call the function. */ - ret = paravirt_patch_call(insnbuf, opfunc, addr, len); + ret = paravirt_patch_call(insn_buff, opfunc, addr, len); return ret; } -unsigned paravirt_patch_insns(void *insnbuf, unsigned len, +unsigned paravirt_patch_insns(void *insn_buff, unsigned len, const char *start, const char *end) { unsigned insn_len = end - start; - if (insn_len > len || start == NULL) - insn_len = len; - else - memcpy(insnbuf, start, insn_len); + /* Alternative instruction is too large for the patch site and we cannot continue: */ + BUG_ON(insn_len > len || start == NULL); + + memcpy(insn_buff, start, insn_len); return insn_len; } diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c new file mode 100644 index 000000000000..3eff63c090d2 --- /dev/null +++ b/arch/x86/kernel/paravirt_patch.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/stringify.h> + +#include <asm/paravirt.h> +#include <asm/asm-offsets.h> + +#define PSTART(d, m) \ + patch_data_##d.m + +#define PEND(d, m) \ + (PSTART(d, m) + sizeof(patch_data_##d.m)) + +#define PATCH(d, m, insn_buff, len) \ + paravirt_patch_insns(insn_buff, len, PSTART(d, m), PEND(d, m)) + +#define PATCH_CASE(ops, m, data, insn_buff, len) \ + case PARAVIRT_PATCH(ops.m): \ + return PATCH(data, ops##_##m, insn_buff, len) + +#ifdef CONFIG_PARAVIRT_XXL +struct patch_xxl { + const unsigned char irq_irq_disable[1]; + const unsigned char irq_irq_enable[1]; + const unsigned char irq_save_fl[2]; + const unsigned char mmu_read_cr2[3]; + const unsigned char mmu_read_cr3[3]; + const unsigned char mmu_write_cr3[3]; + const unsigned char irq_restore_fl[2]; +# ifdef CONFIG_X86_64 + const unsigned char cpu_wbinvd[2]; + const unsigned char cpu_usergs_sysret64[6]; + const unsigned char cpu_swapgs[3]; + const unsigned char mov64[3]; +# else + const unsigned char cpu_iret[1]; +# endif +}; + +static const struct patch_xxl patch_data_xxl = { + .irq_irq_disable = { 0xfa }, // cli + .irq_irq_enable = { 0xfb }, // sti + .irq_save_fl = { 0x9c, 0x58 }, // pushf; pop %[re]ax + .mmu_read_cr2 = { 0x0f, 0x20, 0xd0 }, // mov %cr2, %[re]ax + .mmu_read_cr3 = { 0x0f, 0x20, 0xd8 }, // mov %cr3, %[re]ax +# ifdef CONFIG_X86_64 + .mmu_write_cr3 = { 0x0f, 0x22, 0xdf }, // mov %rdi, %cr3 + .irq_restore_fl = { 0x57, 0x9d }, // push %rdi; popfq + .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd + .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, + 0x48, 0x0f, 0x07 }, // swapgs; sysretq + .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs + .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax +# else + .mmu_write_cr3 = { 0x0f, 0x22, 0xd8 }, // mov %eax, %cr3 + .irq_restore_fl = { 0x50, 0x9d }, // push %eax; popf + .cpu_iret = { 0xcf }, // iret +# endif +}; + +unsigned int paravirt_patch_ident_64(void *insn_buff, unsigned int len) +{ +#ifdef CONFIG_X86_64 + return PATCH(xxl, mov64, insn_buff, len); +#endif + return 0; +} +# endif /* CONFIG_PARAVIRT_XXL */ + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +struct patch_lock { + unsigned char queued_spin_unlock[3]; + unsigned char vcpu_is_preempted[2]; +}; + +static const struct patch_lock patch_data_lock = { + .vcpu_is_preempted = { 0x31, 0xc0 }, // xor %eax, %eax + +# ifdef CONFIG_X86_64 + .queued_spin_unlock = { 0xc6, 0x07, 0x00 }, // movb $0, (%rdi) +# else + .queued_spin_unlock = { 0xc6, 0x00, 0x00 }, // movb $0, (%eax) +# endif +}; +#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, + unsigned int len) +{ + switch (type) { + +#ifdef CONFIG_PARAVIRT_XXL + PATCH_CASE(irq, restore_fl, xxl, insn_buff, len); + PATCH_CASE(irq, save_fl, xxl, insn_buff, len); + PATCH_CASE(irq, irq_enable, xxl, insn_buff, len); + PATCH_CASE(irq, irq_disable, xxl, insn_buff, len); + + PATCH_CASE(mmu, read_cr2, xxl, insn_buff, len); + PATCH_CASE(mmu, read_cr3, xxl, insn_buff, len); + PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); + +# ifdef CONFIG_X86_64 + PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len); + PATCH_CASE(cpu, swapgs, xxl, insn_buff, len); + PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len); +# else + PATCH_CASE(cpu, iret, xxl, insn_buff, len); +# endif +#endif + +#ifdef CONFIG_PARAVIRT_SPINLOCKS + case PARAVIRT_PATCH(lock.queued_spin_unlock): + if (pv_is_native_spin_unlock()) + return PATCH(lock, queued_spin_unlock, insn_buff, len); + break; + + case PARAVIRT_PATCH(lock.vcpu_is_preempted): + if (pv_is_native_vcpu_is_preempted()) + return PATCH(lock, vcpu_is_preempted, insn_buff, len); + break; +#endif + default: + break; + } + + return paravirt_patch_default(type, insn_buff, addr, len); +} diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c deleted file mode 100644 index de138d3912e4..000000000000 --- a/arch/x86/kernel/paravirt_patch_32.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <asm/paravirt.h> - -#ifdef CONFIG_PARAVIRT_XXL -DEF_NATIVE(irq, irq_disable, "cli"); -DEF_NATIVE(irq, irq_enable, "sti"); -DEF_NATIVE(irq, restore_fl, "push %eax; popf"); -DEF_NATIVE(irq, save_fl, "pushf; pop %eax"); -DEF_NATIVE(cpu, iret, "iret"); -DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax"); -DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3"); -DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax"); - -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) -{ - /* arg in %edx:%eax, return in %edx:%eax */ - return 0; -} -#endif - -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -extern bool pv_is_native_spin_unlock(void); -extern bool pv_is_native_vcpu_is_preempted(void); - -unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len) -{ -#define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x) - - switch (type) { -#ifdef CONFIG_PARAVIRT_XXL - PATCH_SITE(irq, irq_disable); - PATCH_SITE(irq, irq_enable); - PATCH_SITE(irq, restore_fl); - PATCH_SITE(irq, save_fl); - PATCH_SITE(cpu, iret); - PATCH_SITE(mmu, read_cr2); - PATCH_SITE(mmu, read_cr3); - PATCH_SITE(mmu, write_cr3); -#endif -#if defined(CONFIG_PARAVIRT_SPINLOCKS) - case PARAVIRT_PATCH(lock.queued_spin_unlock): - if (pv_is_native_spin_unlock()) - return paravirt_patch_insns(ibuf, len, - start_lock_queued_spin_unlock, - end_lock_queued_spin_unlock); - break; - - case PARAVIRT_PATCH(lock.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) - return paravirt_patch_insns(ibuf, len, - start_lock_vcpu_is_preempted, - end_lock_vcpu_is_preempted); - break; -#endif - - default: - break; - } -#undef PATCH_SITE - return paravirt_patch_default(type, ibuf, addr, len); -} diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c deleted file mode 100644 index 9d9e04b31077..000000000000 --- a/arch/x86/kernel/paravirt_patch_64.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <asm/paravirt.h> -#include <asm/asm-offsets.h> -#include <linux/stringify.h> - -#ifdef CONFIG_PARAVIRT_XXL -DEF_NATIVE(irq, irq_disable, "cli"); -DEF_NATIVE(irq, irq_enable, "sti"); -DEF_NATIVE(irq, restore_fl, "pushq %rdi; popfq"); -DEF_NATIVE(irq, save_fl, "pushfq; popq %rax"); -DEF_NATIVE(mmu, read_cr2, "movq %cr2, %rax"); -DEF_NATIVE(mmu, read_cr3, "movq %cr3, %rax"); -DEF_NATIVE(mmu, write_cr3, "movq %rdi, %cr3"); -DEF_NATIVE(cpu, wbinvd, "wbinvd"); - -DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq"); -DEF_NATIVE(cpu, swapgs, "swapgs"); -DEF_NATIVE(, mov64, "mov %rdi, %rax"); - -unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len) -{ - return paravirt_patch_insns(insnbuf, len, - start__mov64, end__mov64); -} -#endif - -#if defined(CONFIG_PARAVIRT_SPINLOCKS) -DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)"); -DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax"); -#endif - -extern bool pv_is_native_spin_unlock(void); -extern bool pv_is_native_vcpu_is_preempted(void); - -unsigned native_patch(u8 type, void *ibuf, unsigned long addr, unsigned len) -{ -#define PATCH_SITE(ops, x) \ - case PARAVIRT_PATCH(ops.x): \ - return paravirt_patch_insns(ibuf, len, start_##ops##_##x, end_##ops##_##x) - - switch (type) { -#ifdef CONFIG_PARAVIRT_XXL - PATCH_SITE(irq, restore_fl); - PATCH_SITE(irq, save_fl); - PATCH_SITE(irq, irq_enable); - PATCH_SITE(irq, irq_disable); - PATCH_SITE(cpu, usergs_sysret64); - PATCH_SITE(cpu, swapgs); - PATCH_SITE(cpu, wbinvd); - PATCH_SITE(mmu, read_cr2); - PATCH_SITE(mmu, read_cr3); - PATCH_SITE(mmu, write_cr3); -#endif -#if defined(CONFIG_PARAVIRT_SPINLOCKS) - case PARAVIRT_PATCH(lock.queued_spin_unlock): - if (pv_is_native_spin_unlock()) - return paravirt_patch_insns(ibuf, len, - start_lock_queued_spin_unlock, - end_lock_queued_spin_unlock); - break; - - case PARAVIRT_PATCH(lock.vcpu_is_preempted): - if (pv_is_native_vcpu_is_preempted()) - return paravirt_patch_insns(ibuf, len, - start_lock_vcpu_is_preempted, - end_lock_vcpu_is_preempted); - break; -#endif - - default: - break; - } -#undef PATCH_SITE - return paravirt_patch_default(type, ibuf, addr, len); -} diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index dcd272dbd0a9..f62b498b18fb 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -70,7 +70,7 @@ void __init pci_iommu_alloc(void) } /* - * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel + * See <Documentation/x86/x86_64/boot-options.rst> for the iommu kernel * parameter documentation. */ static __init int iommu_setup(char *p) diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index 07c30ee17425..bb7e1132290b 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -74,6 +74,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) return regs_get_register(regs, pt_regs_offset[idx]); } +#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \ + ~((1ULL << PERF_REG_X86_MAX) - 1)) + #ifdef CONFIG_X86_32 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \ (1ULL << PERF_REG_X86_R9) | \ @@ -86,7 +89,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; @@ -112,7 +115,7 @@ void perf_get_regs_user(struct perf_regs *regs_user, int perf_reg_validate(u64 mask) { - if (!mask || (mask & REG_NOSUPPORT)) + if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))) return -EINVAL; return 0; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2399e910d109..b8ceec4974fe 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -62,27 +62,21 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; - unsigned long sp; - unsigned short ss, gs; + unsigned short gs; - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss; + if (user_mode(regs)) gs = get_user_gs(regs); - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); + else savesegment(gs, gs); - } show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", - regs->si, regs->di, regs->bp, sp); + regs->si, regs->di, regs->bp, regs->sp); printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, regs->ss, regs->flags); if (mode != SHOW_REGS_ALL) return; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a166c960bc9e..71691a8310e7 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -25,6 +25,7 @@ #include <linux/rcupdate.h> #include <linux/export.h> #include <linux/context_tracking.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/pgtable.h> @@ -154,35 +155,6 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -/* - * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode - * when it traps. The previous stack will be directly underneath the saved - * registers, and 'sp/ss' won't even have been saved. Thus the '®s->sp'. - * - * Now, if the stack is empty, '®s->sp' is out of range. In this - * case we try to take the previous stack. To always return a non-null - * stack pointer we fall back to regs as stack if no previous stack - * exists. - * - * This is valid only for kernel mode traps. - */ -unsigned long kernel_stack_pointer(struct pt_regs *regs) -{ - unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); - unsigned long sp = (unsigned long)®s->sp; - u32 *prev_esp; - - if (context == (sp & ~(THREAD_SIZE - 1))) - return sp; - - prev_esp = (u32 *)(context); - if (*prev_esp) - return (unsigned long)*prev_esp; - - return (unsigned long)regs; -} -EXPORT_SYMBOL_GPL(kernel_stack_pointer); - static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); @@ -397,22 +369,12 @@ static int putreg(struct task_struct *child, case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; - /* - * When changing the FS base, use do_arch_prctl_64() - * to set the index to zero and to set the base - * as requested. - */ - if (child->thread.fsbase != value) - return do_arch_prctl_64(child, ARCH_SET_FS, value); + x86_fsbase_write_task(child, value); return 0; case offsetof(struct user_regs_struct,gs_base): - /* - * Exactly the same here as the %fs handling above. - */ if (value >= TASK_SIZE_MAX) return -EIO; - if (child->thread.gsbase != value) - return do_arch_prctl_64(child, ARCH_SET_GS, value); + x86_gsbase_write_task(child, value); return 0; #endif } @@ -645,7 +607,8 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) unsigned long val = 0; if (n < HBP_NUM) { - struct perf_event *bp = thread->ptrace_bps[n]; + int index = array_index_nospec(n, HBP_NUM); + struct perf_event *bp = thread->ptrace_bps[index]; if (bp) val = bp->hw.info.address; @@ -747,9 +710,6 @@ static int ioperm_get(struct task_struct *target, void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); -#ifdef TIF_SYSCALL_EMU - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); -#endif } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1361,18 +1321,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +void send_sigtrap(struct pt_regs *regs, int error_code, int si_code) { + struct task_struct *tsk = current; + tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; /* Send us the fake SIGTRAP */ force_sig_fault(SIGTRAP, si_code, - user_mode(regs) ? (void __user *)regs->ip : NULL, tsk); + user_mode(regs) ? (void __user *)regs->ip : NULL); } void user_single_step_report(struct pt_regs *regs) { - send_sigtrap(current, regs, 0, TRAP_BRKPT); + send_sigtrap(regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 0ff3e294d0e5..10125358b9c4 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -3,6 +3,7 @@ */ +#include <linux/clocksource.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/notifier.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 08a5f4a131f5..bbe35bf879f5 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -453,15 +453,24 @@ static void __init memblock_x86_reserve_range_setup_data(void) #define CRASH_ALIGN SZ_16M /* - * Keep the crash kernel below this limit. On 32 bits earlier kernels - * would limit the kernel to the low 512 MiB due to mapping restrictions. + * Keep the crash kernel below this limit. + * + * On 32 bits earlier kernels would limit the kernel to the low 512 MiB + * due to mapping restrictions. + * + * On 64bit, kdump kernel need be restricted to be under 64TB, which is + * the upper limit of system RAM in 4-level paing mode. Since the kdump + * jumping could be from 5-level to 4-level, the jumping will fail if + * kernel is put above 64TB, and there's no way to detect the paging mode + * of the kernel which will be loaded for dumping during the 1st kernel + * bootup. */ #ifdef CONFIG_X86_32 # define CRASH_ADDR_LOW_MAX SZ_512M # define CRASH_ADDR_HIGH_MAX SZ_512M #else # define CRASH_ADDR_LOW_MAX SZ_4G -# define CRASH_ADDR_HIGH_MAX MAXMEM +# define CRASH_ADDR_HIGH_MAX SZ_64T #endif static int __init reserve_crashkernel_low(void) @@ -827,8 +836,14 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) void __init setup_arch(char **cmdline_p) { + /* + * Reserve the memory occupied by the kernel between _text and + * __end_of_kernel_reserve symbols. Any kernel sections after the + * __end_of_kernel_reserve symbol must be explicitly reserved with a + * separate memblock_reserve() or they will be discarded. + */ memblock_reserve(__pa_symbol(_text), - (unsigned long)__bss_stop - (unsigned long)_text); + (unsigned long)__end_of_kernel_reserve - (unsigned long)_text); /* * Make sure page 0 is always reserved because on systems with diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 364813cea647..8eb7193e158d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_ex(&frame->uc, &frame->puc); /* Create the ucontext. */ - if (boot_cpu_has(X86_FEATURE_XSAVE)) + if (static_cpu_has(X86_FEATURE_XSAVE)) put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); else put_user_ex(0, &frame->uc.uc_flags); @@ -857,7 +857,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) pr_cont("\n"); } - force_sig(SIGSEGV, me); + force_sig(SIGSEGV); } #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 4693e2f3a03e..96421f97e75c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -144,7 +144,7 @@ void native_send_call_func_ipi(const struct cpumask *mask) } cpumask_copy(allbutself, cpu_online_mask); - cpumask_clear_cpu(smp_processor_id(), allbutself); + __cpumask_clear_cpu(smp_processor_id(), allbutself); if (cpumask_equal(mask, allbutself) && cpumask_equal(cpu_online_mask, cpu_callout_mask)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 362dd8953f48..259d1d2be076 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -89,6 +89,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); +/* representing HT, core, and die siblings of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); +EXPORT_PER_CPU_SYMBOL(cpu_die_map); + DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); /* Per CPU bogomips and other parameters */ @@ -99,6 +103,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; +static unsigned int logical_die __read_mostly; /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; @@ -210,17 +215,11 @@ static void notrace start_secondary(void *unused) * before cpu_init(), SMP booting is too fragile that we want to * limit the things done here to the most necessary things. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - __write_cr4(__read_cr4() | X86_CR4_PCIDE); + cr4_init(); #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); - /* - * Initialize the CR4 shadow before doing anything that could - * try to read it. - */ - cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); @@ -300,6 +299,26 @@ int topology_phys_to_logical_pkg(unsigned int phys_pkg) return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); +/** + * topology_phys_to_logical_die - Map a physical die id to logical + * + * Returns logical die id or -1 if not found + */ +int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) +{ + int cpu; + int proc_id = cpu_data(cur_cpu).phys_proc_id; + + for_each_possible_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && c->cpu_die_id == die_id && + c->phys_proc_id == proc_id) + return c->logical_die_id; + } + return -1; +} +EXPORT_SYMBOL(topology_phys_to_logical_die); /** * topology_update_package_map - Update the physical to logical package map @@ -324,6 +343,29 @@ found: cpu_data(cpu).logical_proc_id = new; return 0; } +/** + * topology_update_die_map - Update the physical to logical die map + * @die: The die id as retrieved via CPUID + * @cpu: The cpu for which this is updated + */ +int topology_update_die_map(unsigned int die, unsigned int cpu) +{ + int new; + + /* Already available somewhere? */ + new = topology_phys_to_logical_die(die, cpu); + if (new >= 0) + goto found; + + new = logical_die++; + if (new != die) { + pr_info("CPU %u Converting physical %u to logical die %u\n", + cpu, die, new); + } +found: + cpu_data(cpu).logical_die_id = new; + return 0; +} void __init smp_store_boot_cpu_info(void) { @@ -333,6 +375,7 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; topology_update_package_map(c->phys_proc_id, id); + topology_update_die_map(c->cpu_die_id, id); c->initialized = true; } @@ -387,6 +430,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && + c->cpu_die_id == o->cpu_die_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { if (c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); @@ -398,6 +442,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } } else if (c->phys_proc_id == o->phys_proc_id && + c->cpu_die_id == o->cpu_die_id && c->cpu_core_id == o->cpu_core_id) { return topology_sane(c, o, "smt"); } @@ -460,6 +505,15 @@ static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + if ((c->phys_proc_id == o->phys_proc_id) && + (c->cpu_die_id == o->cpu_die_id)) + return true; + return false; +} + + #if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { @@ -522,6 +576,7 @@ void set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); + cpumask_set_cpu(cpu, topology_die_cpumask(cpu)); c->booted_cores = 1; return; } @@ -570,6 +625,9 @@ void set_cpu_sibling_map(int cpu) } if (match_pkg(c, o) && !topology_same_node(c, o)) x86_has_numa_in_package = true; + + if ((i == cpu) || (has_mp && match_die(c, o))) + link_mask(topology_die_cpumask, cpu, i); } threads = cpumask_weight(topology_sibling_cpumask(cpu)); @@ -1174,6 +1232,7 @@ static __init void disable_smp(void) physid_set_mask_of_physid(0, &phys_cpu_present_map); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); + cpumask_set_cpu(0, topology_die_cpumask(0)); } /* @@ -1269,6 +1328,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } @@ -1489,6 +1549,8 @@ static void remove_siblinginfo(int cpu) cpu_data(sibling).booted_cores--; } + for_each_cpu(sibling, topology_die_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_die_cpumask(sibling)); for_each_cpu(sibling, topology_sibling_cpumask(cpu)) cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) @@ -1496,6 +1558,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(cpu_llc_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); + cpumask_clear(topology_die_cpumask(cpu)); c->cpu_core_id = 0; c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 2abf27d7df6b..4f36d3241faf 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -129,11 +129,9 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie, break; if ((unsigned long)fp < regs->sp) break; - if (frame.ret_addr) { - if (!consume_entry(cookie, frame.ret_addr, false)) - return; - } - if (fp == frame.next_fp) + if (!frame.ret_addr) + break; + if (!consume_entry(cookie, frame.ret_addr, false)) break; fp = frame.next_fp; } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0e14f6c0d35e..7ce29cee9f9e 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -37,8 +37,7 @@ unsigned long profile_pc(struct pt_regs *regs) #ifdef CONFIG_FRAME_POINTER return *(unsigned long *)(regs->bp + sizeof(long)); #else - unsigned long *sp = - (unsigned long *)kernel_stack_pointer(regs); + unsigned long *sp = (unsigned long *)regs->sp; /* * Return address is either directly at stack pointer * or above a saved flags. Eflags has bits 22-31 zero, @@ -82,8 +81,11 @@ static void __init setup_default_timer_irq(void) /* Default timer init function */ void __init hpet_time_init(void) { - if (!hpet_enable()) - setup_pit_timer(); + if (!hpet_enable()) { + if (!pit_timer_init()) + return; + } + setup_default_timer_irq(); } diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index a5b802a12212..71d3fef1edc9 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -5,6 +5,7 @@ #include <linux/user.h> #include <linux/regset.h> #include <linux/syscalls.h> +#include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/desc.h> @@ -220,6 +221,7 @@ int do_get_thread_area(struct task_struct *p, int idx, struct user_desc __user *u_info) { struct user_desc info; + int index; if (idx == -1 && get_user(idx, &u_info->entry_number)) return -EFAULT; @@ -227,8 +229,11 @@ int do_get_thread_area(struct task_struct *p, int idx, if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; - fill_user_desc(&info, idx, - &p->thread.tls_array[idx - GDT_ENTRY_TLS_MIN]); + index = idx - GDT_ENTRY_TLS_MIN; + index = array_index_nospec(index, + GDT_ENTRY_TLS_MAX - GDT_ENTRY_TLS_MIN + 1); + + fill_user_desc(&info, idx, &p->thread.tls_array[index]); if (copy_to_user(u_info, &info, sizeof(info))) return -EFAULT; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8b6d03e55d2f..87095a477154 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -254,9 +254,9 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, show_signal(tsk, signr, "trap ", str, regs, error_code); if (!sicode) - force_sig(signr, tsk); + force_sig(signr); else - force_sig_fault(signr, sicode, addr, tsk); + force_sig_fault(signr, sicode, addr); } NOKPROBE_SYMBOL(do_trap); @@ -566,7 +566,7 @@ do_general_protection(struct pt_regs *regs, long error_code) show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig(SIGSEGV, tsk); + force_sig(SIGSEGV); } NOKPROBE_SYMBOL(do_general_protection); @@ -805,7 +805,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) } si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) - send_sigtrap(tsk, regs, error_code, si_code); + send_sigtrap(regs, error_code, si_code); cond_local_irq_disable(regs); debug_stack_usage_dec(); @@ -856,7 +856,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) return; force_sig_fault(SIGFPE, si_code, - (void __user *)uprobe_get_trap_addr(regs), task); + (void __user *)uprobe_get_trap_addr(regs)); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 0b29e58f288e..57d87f79558f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -59,7 +59,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) +__always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -76,7 +76,7 @@ void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void __always_inline cyc2ns_read_end(void) +__always_inline void cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -632,31 +632,38 @@ unsigned long native_calibrate_tsc(void) crystal_khz = ecx_hz / 1000; - if (crystal_khz == 0) { - switch (boot_cpu_data.x86_model) { - case INTEL_FAM6_SKYLAKE_MOBILE: - case INTEL_FAM6_SKYLAKE_DESKTOP: - case INTEL_FAM6_KABYLAKE_MOBILE: - case INTEL_FAM6_KABYLAKE_DESKTOP: - crystal_khz = 24000; /* 24.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT_X: - crystal_khz = 25000; /* 25.0 MHz */ - break; - case INTEL_FAM6_ATOM_GOLDMONT: - crystal_khz = 19200; /* 19.2 MHz */ - break; - } - } + /* + * Denverton SoCs don't report crystal clock, and also don't support + * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal + * clock. + */ + if (crystal_khz == 0 && + boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X) + crystal_khz = 25000; - if (crystal_khz == 0) - return 0; /* - * TSC frequency determined by CPUID is a "hardware reported" + * TSC frequency reported directly by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This * is considered a known frequency. */ - setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + if (crystal_khz != 0) + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + + /* + * Some Intel SoCs like Skylake and Kabylake don't report the crystal + * clock, but we can easily calculate it to a high degree of accuracy + * by considering the crystal ratio and the CPU speed. + */ + if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) { + unsigned int eax_base_mhz, ebx, ecx, edx; + + cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx); + crystal_khz = eax_base_mhz * 1000 * + eax_denominator / ebx_numerator; + } + + if (crystal_khz == 0) + return 0; /* * For Atom SoCs TSC is the only reliable clocksource. @@ -665,6 +672,16 @@ unsigned long native_calibrate_tsc(void) if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT) setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); +#ifdef CONFIG_X86_LOCAL_APIC + /* + * The local APIC appears to be fed by the core crystal clock + * (which sounds entirely sensible). We can set the global + * lapic_timer_period here to avoid having to calibrate the APIC + * timer later. + */ + lapic_timer_period = crystal_khz * 1000 / HZ; +#endif + return crystal_khz * ebx_numerator / eax_denominator; } diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 3d0e9aeea7c8..067858fe4db8 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -71,7 +71,7 @@ static const struct x86_cpu_id tsc_msr_cpu_ids[] = { /* * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Set global "lapic_timer_period" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ unsigned long cpu_khz_from_msr(void) @@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void) res = freq * ratio; #ifdef CONFIG_X86_LOCAL_APIC - lapic_timer_frequency = (freq * 1000) / HZ; + lapic_timer_period = (freq * 1000) / HZ; #endif /* diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index f8f3cfda01ae..5b345add550f 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -277,7 +277,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk); + force_sig_fault(SIGSEGV, SEGV_MAPERR, addr); if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 6106760de716..a224b5ab103f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -70,15 +70,6 @@ static void unwind_dump(struct unwind_state *state) } } -static size_t regs_size(struct pt_regs *regs) -{ - /* x86_32 regs from kernel mode are two words shorter: */ - if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs)) - return sizeof(*regs) - 2*sizeof(long); - - return sizeof(*regs); -} - static bool in_entry_code(unsigned long ip) { char *addr = (char *)ip; @@ -198,12 +189,6 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp) } #endif -#ifdef CONFIG_X86_32 -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long)) -#else -#define KERNEL_REGS_SIZE (sizeof(struct pt_regs)) -#endif - static bool update_stack_state(struct unwind_state *state, unsigned long *next_bp) { @@ -214,7 +199,7 @@ static bool update_stack_state(struct unwind_state *state, size_t len; if (state->regs) - prev_frame_end = (void *)state->regs + regs_size(state->regs); + prev_frame_end = (void *)state->regs + sizeof(*state->regs); else prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; @@ -222,7 +207,7 @@ static bool update_stack_state(struct unwind_state *state, regs = decode_frame_pointer(next_bp); if (regs) { frame = (unsigned long *)regs; - len = KERNEL_REGS_SIZE; + len = sizeof(*regs); state->got_irq = true; } else { frame = next_bp; @@ -246,14 +231,6 @@ static bool update_stack_state(struct unwind_state *state, frame < prev_frame_end) return false; - /* - * On 32-bit with user mode regs, make sure the last two regs are safe - * to access: - */ - if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) && - !on_stack(info, frame, len + 2*sizeof(long))) - return false; - /* Move state to the next frame: */ if (regs) { state->regs = regs; @@ -412,10 +389,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, * Pretend that the frame is complete and that BP points to it, but save * the real BP so that we can use it when looking for the next frame. */ - if (regs && regs->ip == 0 && - (unsigned long *)kernel_stack_pointer(regs) >= first_frame) { + if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { state->next_bp = bp; - bp = ((unsigned long *)kernel_stack_pointer(regs)) - 1; + bp = ((unsigned long *)regs->sp) - 1; } /* Initialize stack info and make sure the frame data is accessible: */ diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 33b66b5c5aec..332ae6530fa8 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -82,9 +82,9 @@ static struct orc_entry *orc_find(unsigned long ip); * But they are copies of the ftrace entries that are static and * defined in ftrace_*.S, which do have orc entries. * - * If the undwinder comes across a ftrace trampoline, then find the + * If the unwinder comes across a ftrace trampoline, then find the * ftrace function that was used to create it, and use that ftrace - * function's orc entrie, as the placement of the return code in + * function's orc entry, as the placement of the return code in * the stack will be identical. */ static struct orc_entry *orc_ftrace_find(unsigned long ip) @@ -128,6 +128,16 @@ static struct orc_entry null_orc_entry = { .type = ORC_TYPE_CALL }; +/* Fake frame pointer entry -- used as a fallback for generated code */ +static struct orc_entry orc_fp_entry = { + .type = ORC_TYPE_CALL, + .sp_reg = ORC_REG_BP, + .sp_offset = 16, + .bp_reg = ORC_REG_PREV_SP, + .bp_offset = -16, + .end = 0, +}; + static struct orc_entry *orc_find(unsigned long ip) { static struct orc_entry *orc; @@ -392,8 +402,16 @@ bool unwind_next_frame(struct unwind_state *state) * calls and calls to noreturn functions. */ orc = orc_find(state->signal ? state->ip : state->ip - 1); - if (!orc) - goto err; + if (!orc) { + /* + * As a fallback, try to assume this code uses a frame pointer. + * This is useful for generated code, like BPF, which ORC + * doesn't know about. This is just a guess, so the rest of + * the unwind is no longer considered reliable. + */ + orc = &orc_fp_entry; + state->error = true; + } /* End-of-stack check for kernel threads: */ if (orc->sp_reg == ORC_REG_UNDEFINED) { @@ -580,7 +598,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, goto done; state->ip = regs->ip; - state->sp = kernel_stack_pointer(regs); + state->sp = regs->sp; state->bp = regs->bp; state->regs = regs; state->full_regs = true; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 918b5092a85f..d8359ebeea70 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1074,7 +1074,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } return -1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 6a38717d179c..a76c12b38e92 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -583,7 +583,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) return 1; /* we let this handle by the calling routine */ current->thread.trap_nr = trapno; current->thread.error_code = error_code; - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); return 0; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0850b5149345..e2feacf921a0 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -141,10 +141,10 @@ SECTIONS *(.text.__x86.indirect_thunk) __indirect_thunk_end = .; #endif - } :text = 0x9090 - /* End of text section */ - _etext = .; + /* End of text section */ + _etext = .; + } :text = 0x9090 NOTES :text :note @@ -368,6 +368,14 @@ SECTIONS __bss_stop = .; } + /* + * The memory occupied from _text to here, __end_of_kernel_reserve, is + * automatically reserved in setup_arch(). Anything after here must be + * explicitly reserved using memblock_reserve() or it will be discarded + * and treated as available memory. + */ + __end_of_kernel_reserve = .; + . = ALIGN(PAGE_SIZE); .brk : AT(ADDR(.brk) - LOAD_OFFSET) { __brk_base = .; @@ -379,10 +387,34 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ _end = .; +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* + * Early scratch/workarea section: Lives outside of the kernel proper + * (_text - _end). + * + * Resides after _end because even though the .brk section is after + * __end_of_kernel_reserve, the .brk section is later reserved as a + * part of the kernel. Since it is located after __end_of_kernel_reserve + * it will be discarded and become part of the available memory. As + * such, it can only be used by very early boot code and must not be + * needed afterwards. + * + * Currently used by SME for performing in-place encryption of the + * kernel during boot. Resides on a 2MB boundary to simplify the + * pagetable setup used for SME in-place encryption. + */ + . = ALIGN(HPAGE_SIZE); + .init.scratch : AT(ADDR(.init.scratch) - LOAD_OFFSET) { + __init_scratch_begin = .; + *(.init.scratch) + . = ALIGN(HPAGE_SIZE); + __init_scratch_end = .; + } +#endif + STABS_DEBUG DWARF_DEBUG - /* Sections to be discarded */ DISCARDS /DISCARD/ : { *(.eh_frame) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index fc042419e670..840e12583b85 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -41,6 +41,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_NO_POLL select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select SRCU diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4992e7c99588..ead681210306 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -134,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { + best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + if (best) { + if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT) + best->ecx |= F(MWAIT); + else + best->ecx &= ~F(MWAIT); + } + } + /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); @@ -276,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum) *word &= boot_cpu_data.x86_capability[wordnum]; } -static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, +static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function, u32 index) { entry->function = function; entry->index = index; + entry->flags = 0; + cpuid_count(entry->function, entry->index, &entry->eax, &entry->ebx, &entry->ecx, &entry->edx); - entry->flags = 0; + + switch (function) { + case 2: + entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + break; + case 4: + case 7: + case 0xb: + case 0xd: + case 0x14: + case 0x8000001d: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + break; + } } -static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, - u32 func, u32 index, int *nent, int maxnent) +static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, int *nent, int maxnent) { + entry->function = func; + entry->index = 0; + entry->flags = 0; + switch (func) { case 0: entry->eax = 7; @@ -300,21 +329,83 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, break; case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - if (index == 0) - entry->ecx = F(RDPID); + entry->eax = 0; + entry->ecx = F(RDPID); ++*nent; default: break; } - entry->function = func; - entry->index = index; - return 0; } -static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index) +{ + unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; + unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; + unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; + unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; + unsigned f_la57; + + /* cpuid 7.0.ebx */ + const u32 kvm_cpuid_7_0_ebx_x86_features = + F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | + F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | + F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | + F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | + F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt; + + /* cpuid 7.0.ecx*/ + const u32 kvm_cpuid_7_0_ecx_x86_features = + F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | + F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | + F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); + + /* cpuid 7.0.edx*/ + const u32 kvm_cpuid_7_0_edx_x86_features = + F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); + + switch (index) { + case 0: + entry->eax = 0; + entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; + cpuid_mask(&entry->ebx, CPUID_7_0_EBX); + /* TSC_ADJUST is emulated */ + entry->ebx |= F(TSC_ADJUST); + + entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; + f_la57 = entry->ecx & F(LA57); + cpuid_mask(&entry->ecx, CPUID_7_ECX); + /* Set LA57 based on hardware capability. */ + entry->ecx |= f_la57; + entry->ecx |= f_umip; + /* PKU is not yet implemented for shadow paging. */ + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) + entry->ecx &= ~F(PKU); + + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); + break; + default: + WARN_ON_ONCE(1); + entry->eax = 0; + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + break; + } +} + +static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function, + int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -327,12 +418,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_lm = 0; #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; - unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; - unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; - unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; - unsigned f_la57 = 0; /* cpuid 1.edx */ const u32 kvm_cpuid_1_edx_x86_features = @@ -377,7 +464,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | - F(AMD_SSB_NO) | F(AMD_STIBP); + F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -385,31 +472,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN); - /* cpuid 7.0.ebx */ - const u32 kvm_cpuid_7_0_ebx_x86_features = - F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | - F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | - F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt; - /* cpuid 0xD.1.eax */ const u32 kvm_cpuid_D_1_eax_x86_features = F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; - /* cpuid 7.0.ecx*/ - const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | - F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | - F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); - - /* cpuid 7.0.edx*/ - const u32 kvm_cpuid_7_0_edx_x86_features = - F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | - F(MD_CLEAR); - /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -418,12 +484,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (*nent >= maxnent) goto out; - do_cpuid_1_ent(entry, function, index); + do_host_cpuid(entry, function, 0); ++*nent; switch (function) { case 0: - entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd)); + /* Limited to the highest leaf implemented in KVM. */ + entry->eax = min(entry->eax, 0x1fU); break; case 1: entry->edx &= kvm_cpuid_1_edx_x86_features; @@ -441,14 +508,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, case 2: { int t, times = entry->eax & 0xff; - entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times; ++t) { if (*nent >= maxnent) goto out; - do_cpuid_1_ent(&entry[t], function, 0); - entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; + do_host_cpuid(&entry[t], function, 0); ++*nent; } break; @@ -458,7 +523,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, case 0x8000001d: { int i, cache_type; - entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until cache_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) @@ -467,9 +531,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, cache_type = entry[i - 1].eax & 0x1f; if (!cache_type) break; - do_cpuid_1_ent(&entry[i], function, i); - entry[i].flags |= - KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + do_host_cpuid(&entry[i], function, i); ++*nent; } break; @@ -480,36 +542,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx = 0; entry->edx = 0; break; + /* function 7 has additional index. */ case 7: { - entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - /* Mask ebx against host capability word 9 */ - if (index == 0) { - entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; - cpuid_mask(&entry->ebx, CPUID_7_0_EBX); - // TSC_ADJUST is emulated - entry->ebx |= F(TSC_ADJUST); - entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; - f_la57 = entry->ecx & F(LA57); - cpuid_mask(&entry->ecx, CPUID_7_ECX); - /* Set LA57 based on hardware capability. */ - entry->ecx |= f_la57; - entry->ecx |= f_umip; - /* PKU is not yet implemented for shadow paging. */ - if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) - entry->ecx &= ~F(PKU); - entry->edx &= kvm_cpuid_7_0_edx_x86_features; - cpuid_mask(&entry->edx, CPUID_7_EDX); - /* - * We emulate ARCH_CAPABILITIES in software even - * if the host doesn't support it. - */ - entry->edx |= F(ARCH_CAPABILITIES); - } else { - entry->ebx = 0; - entry->ecx = 0; - entry->edx = 0; + int i; + + for (i = 0; ; ) { + do_cpuid_7_mask(&entry[i], i); + if (i == entry->eax) + break; + if (*nent >= maxnent) + goto out; + + ++i; + do_host_cpuid(&entry[i], function, i); + ++*nent; } - entry->eax = 0; break; } case 9: @@ -543,11 +590,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->edx = edx.full; break; } - /* function 0xb has additional index. */ + /* + * Per Intel's SDM, the 0x1f is a superset of 0xb, + * thus they can be handled by common code. + */ + case 0x1f: case 0xb: { int i, level_type; - entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) @@ -556,9 +606,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, level_type = entry[i - 1].ecx & 0xff00; if (!level_type) break; - do_cpuid_1_ent(&entry[i], function, i); - entry[i].flags |= - KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + do_host_cpuid(&entry[i], function, i); ++*nent; } break; @@ -571,7 +619,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ebx = xstate_required_size(supported, false); entry->ecx = entry->ebx; entry->edx &= supported >> 32; - entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; if (!supported) break; @@ -580,7 +627,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (*nent >= maxnent) goto out; - do_cpuid_1_ent(&entry[i], function, idx); + do_host_cpuid(&entry[i], function, idx); if (idx == 1) { entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); @@ -597,8 +644,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } entry[i].ecx = 0; entry[i].edx = 0; - entry[i].flags |= - KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; ++i; } @@ -611,12 +656,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, if (!f_intel_pt) break; - entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; for (t = 1; t <= times; ++t) { if (*nent >= maxnent) goto out; - do_cpuid_1_ent(&entry[t], function, t); - entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + do_host_cpuid(&entry[t], function, t); ++*nent; } break; @@ -640,7 +683,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | - (1 << KVM_FEATURE_PV_SEND_IPI); + (1 << KVM_FEATURE_PV_SEND_IPI) | + (1 << KVM_FEATURE_POLL_CONTROL) | + (1 << KVM_FEATURE_PV_SCHED_YIELD); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); @@ -730,21 +775,19 @@ out: return r; } -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, - u32 idx, int *nent, int maxnent, unsigned int type) +static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func, + int *nent, int maxnent, unsigned int type) { if (type == KVM_GET_EMULATED_CPUID) - return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + return __do_cpuid_func_emulated(entry, func, nent, maxnent); - return __do_cpuid_ent(entry, func, idx, nent, maxnent); + return __do_cpuid_func(entry, func, nent, maxnent); } #undef F struct kvm_cpuid_param { u32 func; - u32 idx; - bool has_leaf_count; bool (*qualifier)(const struct kvm_cpuid_param *param); }; @@ -788,11 +831,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, int limit, nent = 0, r = -E2BIG, i; u32 func; static const struct kvm_cpuid_param param[] = { - { .func = 0, .has_leaf_count = true }, - { .func = 0x80000000, .has_leaf_count = true }, - { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, + { .func = 0 }, + { .func = 0x80000000 }, + { .func = 0xC0000000, .qualifier = is_centaur_cpu }, { .func = KVM_CPUID_SIGNATURE }, - { .func = KVM_CPUID_FEATURES }, }; if (cpuid->nent < 1) @@ -816,19 +858,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, if (ent->qualifier && !ent->qualifier(ent)) continue; - r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent, type); + r = do_cpuid_func(&cpuid_entries[nent], ent->func, + &nent, cpuid->nent, type); if (r) goto out_free; - if (!ent->has_leaf_count) - continue; - limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) - r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent, type); + r = do_cpuid_func(&cpuid_entries[nent], func, + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9a327d5b6d1f..d78a61408243 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -47,8 +47,6 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, - [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, - [CPUID_F_1_EDX] = { 0xf, 1, CPUID_EDX}, [CPUID_8000_0008_EBX] = {0x80000008, 0, CPUID_EBX}, [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a387a235424..8e409ad448f9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4258,7 +4258,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt) ulong dr6; ctxt->ops->get_dr(ctxt, 6, &dr6); - dr6 &= ~15; + dr6 &= ~DR_TRAP_BITS; dr6 |= DR6_BD | DR6_RTM; ctxt->ops->set_dr(ctxt, 6, dr6); return emulate_db(ctxt); diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index d6519a3aa959..7c6233d37c64 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -102,7 +102,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm) return mode != KVM_IRQCHIP_NONE; } -bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 924b3bd5a7b7..8ecd48d31800 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -75,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, if (r < 0) r = 0; r += kvm_apic_set_irq(vcpu, irq, dest_map); - } else if (kvm_lapic_enabled(vcpu)) { + } else if (kvm_apic_sw_enabled(vcpu->arch.apic)) { if (!kvm_vector_hashing_enabled()) { if (!lowest) lowest = vcpu; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a21c440ff356..a232e76d8f23 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -69,6 +69,7 @@ #define X2APIC_BROADCAST 0xFFFFFFFFul #define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 +#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000 /* step-by-step approximation to mitigate fluctuation */ #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 @@ -85,11 +86,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector) apic_test_vector(vector, apic->regs + APIC_IRR); } -static inline void apic_clear_vector(int vec, void *bitmap) -{ - clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); -} - static inline int __apic_test_and_set_vector(int vec, void *bitmap) { return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -443,12 +439,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) if (unlikely(vcpu->arch.apicv_active)) { /* need to update RVI */ - apic_clear_vector(vec, apic->regs + APIC_IRR); + kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); } else { apic->irr_pending = false; - apic_clear_vector(vec, apic->regs + APIC_IRR); + kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); if (apic_search_irr(apic) != -1) apic->irr_pending = true; } @@ -1053,9 +1049,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) { if (trig_mode) - kvm_lapic_set_vector(vector, apic->regs + APIC_TMR); + kvm_lapic_set_vector(vector, + apic->regs + APIC_TMR); else - apic_clear_vector(vector, apic->regs + APIC_TMR); + kvm_lapic_clear_vector(vector, + apic->regs + APIC_TMR); } if (vcpu->arch.apicv_active) @@ -1313,21 +1311,45 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) return container_of(dev, struct kvm_lapic, dev); } +#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4)) +#define APIC_REGS_MASK(first, count) \ + (APIC_REG_MASK(first) * ((1ull << (count)) - 1)) + int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, void *data) { unsigned char alignment = offset & 0xf; u32 result; /* this bitmask has a bit cleared for each reserved register */ - static const u64 rmask = 0x43ff01ffffffe70cULL; - - if ((alignment + len) > 4) { - apic_debug("KVM_APIC_READ: alignment error %x %d\n", - offset, len); - return 1; - } + u64 valid_reg_mask = + APIC_REG_MASK(APIC_ID) | + APIC_REG_MASK(APIC_LVR) | + APIC_REG_MASK(APIC_TASKPRI) | + APIC_REG_MASK(APIC_PROCPRI) | + APIC_REG_MASK(APIC_LDR) | + APIC_REG_MASK(APIC_DFR) | + APIC_REG_MASK(APIC_SPIV) | + APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) | + APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) | + APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) | + APIC_REG_MASK(APIC_ESR) | + APIC_REG_MASK(APIC_ICR) | + APIC_REG_MASK(APIC_ICR2) | + APIC_REG_MASK(APIC_LVTT) | + APIC_REG_MASK(APIC_LVTTHMR) | + APIC_REG_MASK(APIC_LVTPC) | + APIC_REG_MASK(APIC_LVT0) | + APIC_REG_MASK(APIC_LVT1) | + APIC_REG_MASK(APIC_LVTERR) | + APIC_REG_MASK(APIC_TMICT) | + APIC_REG_MASK(APIC_TMCCT) | + APIC_REG_MASK(APIC_TDCR); + + /* ARBPRI is not valid on x2APIC */ + if (!apic_x2apic_mode(apic)) + valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); - if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) { + if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { apic_debug("KVM_APIC_READ: read reserved register %x\n", offset); return 1; @@ -1499,11 +1521,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles) } } -void wait_lapic_expire(struct kvm_vcpu *vcpu) +static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, + s64 advance_expire_delta) { struct kvm_lapic *apic = vcpu->arch.apic; u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns; - u64 guest_tsc, tsc_deadline, ns; + u64 ns; + + /* too early */ + if (advance_expire_delta < 0) { + ns = -advance_expire_delta * 1000000ULL; + do_div(ns, vcpu->arch.virtual_tsc_khz); + timer_advance_ns -= min((u32)ns, + timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + } else { + /* too late */ + ns = advance_expire_delta * 1000000ULL; + do_div(ns, vcpu->arch.virtual_tsc_khz); + timer_advance_ns += min((u32)ns, + timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + } + + if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) + apic->lapic_timer.timer_advance_adjust_done = true; + if (unlikely(timer_advance_ns > 5000)) { + timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; + apic->lapic_timer.timer_advance_adjust_done = false; + } + apic->lapic_timer.timer_advance_ns = timer_advance_ns; +} + +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + u64 guest_tsc, tsc_deadline; if (apic->lapic_timer.expired_tscdeadline == 0) return; @@ -1514,34 +1565,15 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); + apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; if (guest_tsc < tsc_deadline) __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc); - if (!apic->lapic_timer.timer_advance_adjust_done) { - /* too early */ - if (guest_tsc < tsc_deadline) { - ns = (tsc_deadline - guest_tsc) * 1000000ULL; - do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns -= min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); - } else { - /* too late */ - ns = (guest_tsc - tsc_deadline) * 1000000ULL; - do_div(ns, vcpu->arch.virtual_tsc_khz); - timer_advance_ns += min((u32)ns, - timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); - } - if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) - apic->lapic_timer.timer_advance_adjust_done = true; - if (unlikely(timer_advance_ns > 5000)) { - timer_advance_ns = 0; - apic->lapic_timer.timer_advance_adjust_done = true; - } - apic->lapic_timer.timer_advance_ns = timer_advance_ns; - } + if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) + adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); } +EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); static void start_sw_tscdeadline(struct kvm_lapic *apic) { @@ -2014,7 +2046,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, apic_debug("%s: offset 0x%x with length 0x%x, and value is " "0x%x\n", __func__, offset, len, val); - kvm_lapic_reg_write(apic, offset & 0xff0, val); + kvm_lapic_reg_write(apic, offset, val); return 0; } @@ -2311,7 +2343,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) HRTIMER_MODE_ABS_PINNED); apic->lapic_timer.timer.function = apic_timer_fn; if (timer_advance_ns == -1) { - apic->lapic_timer.timer_advance_ns = 1000; + apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; apic->lapic_timer.timer_advance_adjust_done = false; } else { apic->lapic_timer.timer_advance_ns = timer_advance_ns; @@ -2321,7 +2353,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) /* * APIC is created enabled. This will prevent kvm_lapic_set_base from - * thinking that APIC satet has changed. + * thinking that APIC state has changed. */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ @@ -2330,6 +2362,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) return 0; nomem_free_apic: kfree(apic); + vcpu->arch.apic = NULL; nomem: return -ENOMEM; } @@ -2339,7 +2372,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!apic_enabled(apic)) + if (!kvm_apic_hw_enabled(apic)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index d6d049ba3045..36747174e4a8 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -32,6 +32,7 @@ struct kvm_timer { u64 tscdeadline; u64 expired_tscdeadline; u32 timer_advance_ns; + s64 advance_expire_delta; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; bool timer_advance_adjust_done; @@ -129,6 +130,11 @@ void kvm_lapic_exit(void); #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) +static inline void kvm_lapic_clear_vector(int vec, void *bitmap) +{ + clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); +} + static inline void kvm_lapic_set_vector(int vec, void *bitmap) { set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); -void wait_lapic_expire(struct kvm_vcpu *vcpu); +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 98f6e4f88b04..9a5814d8d194 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -140,9 +140,6 @@ module_param(dbg, bool, 0644); #include <trace/events/kvm.h> -#define CREATE_TRACE_POINTS -#include "mmutrace.h" - #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) @@ -259,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; */ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; +/* + * The number of non-reserved physical address bits irrespective of features + * that repurpose legal bits, e.g. MKTME. + */ +static u8 __read_mostly shadow_phys_bits; static void mmu_spte_set(u64 *sptep, u64 spte); +static bool is_executable_pte(u64 spte); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); +#define CREATE_TRACE_POINTS +#include "mmutrace.h" + static inline bool kvm_available_flush_tlb_with_range(void) { @@ -468,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); +static u8 kvm_get_shadow_phys_bits(void) +{ + /* + * boot_cpu_data.x86_phys_bits is reduced when MKTME is detected + * in CPU detection code, but MKTME treats those reduced bits as + * 'keyID' thus they are not reserved bits. Therefore for MKTME + * we should still return physical address bits reported by CPUID. + */ + if (!boot_cpu_has(X86_FEATURE_TME) || + WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008)) + return boot_cpu_data.x86_phys_bits; + + return cpuid_eax(0x80000008) & 0xff; +} + static void kvm_mmu_reset_all_pte_masks(void) { u8 low_phys_bits; @@ -481,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void) shadow_present_mask = 0; shadow_acc_track_mask = 0; + shadow_phys_bits = kvm_get_shadow_phys_bits(); + /* * If the CPU has 46 or less physical address bits, then set an * appropriate mask to guard against L1TF attacks. Otherwise, it is @@ -650,7 +673,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) /* * The idea using the light way get the spte on x86_32 guest is from - * gup_get_pte(arch/x86/mm/gup.c). + * gup_get_pte (mm/gup.c). * * An spte tlb flush may be pending, because kvm_set_pte_rmapp * coalesces them and we are running out of the MMU lock. Therefore @@ -1073,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { - if (sp->role.direct) - BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); - else + if (!sp->role.direct) { sp->gfns[index] = gfn; + return; + } + + if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) + pr_err_ratelimited("gfn mismatch under direct page %llx " + "(expected %llx, got %llx)\n", + sp->gfn, + kvm_mmu_page_get_gfn(sp, index), gfn); } /* @@ -3055,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, ret = RET_PF_EMULATE; pgprintk("%s: setting spte %llx\n", __func__, *sptep); - pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", - is_large_pte(*sptep)? "2MB" : "4kB", - *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, - *sptep, sptep); + trace_kvm_mmu_set_spte(level, gfn, sptep); if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; @@ -3070,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, } } - kvm_release_pfn_clean(pfn); - return ret; } @@ -3106,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, if (ret <= 0) return -1; - for (i = 0; i < ret; i++, gfn++, start++) + for (i = 0; i < ret; i++, gfn++, start++) { mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, page_to_pfn(pages[i]), true, true); + put_page(pages[i]); + } return 0; } @@ -3156,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, - int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + int map_writable, int level, kvm_pfn_t pfn, + bool prefault) { - struct kvm_shadow_walk_iterator iterator; + struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; - int emulate = 0; - gfn_t pseudo_gfn; + int ret; + gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t base_gfn = gfn; if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return 0; + return RET_PF_RETRY; - for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { - if (iterator.level == level) { - emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, level, gfn, pfn, prefault, - map_writable); - direct_pte_prefetch(vcpu, iterator.sptep); - ++vcpu->stat.pf_fixed; + trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) break; - } - drop_large_spte(vcpu, iterator.sptep); - if (!is_shadow_present_pte(*iterator.sptep)) { - u64 base_addr = iterator.addr; + drop_large_spte(vcpu, it.sptep); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, + it.level - 1, true, ACC_ALL); - base_addr &= PT64_LVL_ADDR_MASK(iterator.level); - pseudo_gfn = base_addr >> PAGE_SHIFT; - sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, 1, ACC_ALL); - - link_shadow_page(vcpu, iterator.sptep, sp); + link_shadow_page(vcpu, it.sptep, sp); } } - return emulate; + + ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, + write, level, base_gfn, pfn, prefault, + map_writable); + direct_pte_prefetch(vcpu, it.sptep); + ++vcpu->stat.pf_fixed; + return ret; } static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) @@ -3216,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t *gfnp, kvm_pfn_t *pfnp, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { kvm_pfn_t pfn = *pfnp; - gfn_t gfn = *gfnp; int level = *levelp; /* @@ -3247,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, mask = KVM_PAGES_PER_HPAGE(level) - 1; VM_BUG_ON((gfn & mask) != (pfn & mask)); if (pfn & mask) { - gfn &= ~mask; - *gfnp = gfn; kvm_release_pfn_clean(pfn); pfn &= ~mask; kvm_get_pfn(pfn); @@ -3505,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; - + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, @@ -4015,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } -bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) -{ - if (unlikely(!lapic_in_kernel(vcpu) || - kvm_event_needs_reinjection(vcpu) || - vcpu->arch.exception.pending)) - return false; - - if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) - return false; - - return kvm_x86_ops->interrupt_allowed(vcpu); -} - static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable) { @@ -4147,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; - + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static void nonpaging_init_context(struct kvm_vcpu *vcpu, @@ -4494,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) */ shadow_zero_check = &context->shadow_zero_check; __reset_rsvds_bits_mask(vcpu, shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, context->shadow_root_level, uses_nx, guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES), is_pse(vcpu), true); @@ -4531,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(vcpu, shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, context->shadow_root_level, false, boot_cpu_has(X86_FEATURE_GBPAGES), true, true); else __reset_rsvds_bits_mask_ept(shadow_zero_check, - boot_cpu_data.x86_phys_bits, + shadow_phys_bits, false); if (!shadow_me_mask) @@ -4558,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context, bool execonly) { __reset_rsvds_bits_mask_ept(&context->shadow_zero_check, - boot_cpu_data.x86_phys_bits, execonly); + shadow_phys_bits, execonly); } #define BYTE_MASK(access) \ @@ -5935,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5977,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } @@ -5999,6 +6003,34 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static void kvm_set_mmio_spte_mask(void) +{ + u64 mask; + + /* + * Set the reserved bits and the present bit of an paging-structure + * entry to generate page fault with PFER.RSV = 1. + */ + + /* + * Mask the uppermost physical address bit, which would be reserved as + * long as the supported physical address width is less than 52. + */ + mask = 1ull << 51; + + /* Set the present bit. */ + mask |= 1ull; + + /* + * If reserved bit is not supported, clear the present bit to disable + * mmio page fault. + */ + if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52) + mask &= ~1ull; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +} + int kvm_mmu_module_init(void) { int ret = -ENOMEM; @@ -6015,6 +6047,8 @@ int kvm_mmu_module_init(void) kvm_mmu_reset_all_pte_masks(); + kvm_set_mmio_spte_mask(); + pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), 0, SLAB_ACCOUNT, NULL); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index dd30dccd2ad5..d8001b4bca05 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -301,6 +301,65 @@ TRACE_EVENT( __entry->kvm_gen == __entry->spte_gen ) ); + +TRACE_EVENT( + kvm_mmu_set_spte, + TP_PROTO(int level, gfn_t gfn, u64 *sptep), + TP_ARGS(level, gfn, sptep), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, spte) + __field(u64, sptep) + __field(u8, level) + /* These depend on page entry type, so compute them now. */ + __field(bool, r) + __field(bool, x) + __field(u8, u) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->spte = *sptep; + __entry->sptep = virt_to_phys(sptep); + __entry->level = level; + __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); + __entry->x = is_executable_pte(__entry->spte); + __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; + ), + + TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", + __entry->gfn, __entry->spte, + __entry->r ? "r" : "-", + __entry->spte & PT_WRITABLE_MASK ? "w" : "-", + __entry->x ? "x" : "-", + __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), + __entry->level, __entry->sptep + ) +); + +TRACE_EVENT( + kvm_mmu_spte_requested, + TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), + TP_ARGS(addr, level, pfn), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, pfn) + __field(u8, level) + ), + + TP_fast_assign( + __entry->gfn = addr >> PAGE_SHIFT; + __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); + __entry->level = level; + ), + + TP_printk("gfn %llx pfn %llx level %d", + __entry->gfn, __entry->pfn, __entry->level + ) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d583bcd119fc..7d5cdb3af594 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -540,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); + kvm_release_pfn_clean(pfn); return true; } @@ -619,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; + gfn_t base_gfn; direct_access = gw->pte_access; @@ -663,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - for (; - shadow_walk_okay(&it) && it.level > hlevel; - shadow_walk_next(&it)) { - gfn_t direct_gfn; + base_gfn = gw->gfn; + + trace_kvm_mmu_spte_requested(addr, gw->level, pfn); + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == hlevel) + break; + validate_direct_spte(vcpu, it.sptep, direct_access); drop_large_spte(vcpu, it.sptep); - if (is_shadow_present_pte(*it.sptep)) - continue; - - direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); - - sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access); - link_shadow_page(vcpu, it.sptep, sp); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, addr, + it.level - 1, true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); + } } - clear_sp_write_flooding_count(it.sptep); ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, - it.level, gw->gfn, pfn, prefault, map_writable); + it.level, base_gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); - + ++vcpu->stat.pf_fixed; return ret; out_gpte_changed: - kvm_release_pfn_clean(pfn); return RET_PF_RETRY; } @@ -839,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, walker.pte_access &= ~ACC_EXEC_MASK; } + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -847,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (!force_pt_level) - transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); + transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, pfn, map_writable, prefault); - ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 132d149494d6..aa5a2597305a 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -19,6 +19,9 @@ #include "lapic.h" #include "pmu.h" +/* This keeps the total size of the filter under 4k. */ +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 + /* NOTE: * - Each perf counter is defined as "struct kvm_pmc"; * - There are two types of perf counters: general purpose (gp) and fixed. @@ -141,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { unsigned config, type = PERF_TYPE_RAW; u8 event_select, unit_mask; + struct kvm *kvm = pmc->vcpu->kvm; + struct kvm_pmu_event_filter *filter; + int i; + bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -152,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) return; + filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); + if (filter) { + for (i = 0; i < filter->nevents; i++) + if (filter->events[i] == + (eventsel & AMD64_RAW_EVENT_MASK_NB)) + break; + if (filter->action == KVM_PMU_EVENT_ALLOW && + i == filter->nevents) + allow_event = false; + if (filter->action == KVM_PMU_EVENT_DENY && + i < filter->nevents) + allow_event = false; + } + if (!allow_event) + return; + event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; @@ -261,10 +284,10 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) ctr_val = rdtsc(); break; case VMWARE_BACKDOOR_PMC_REAL_TIME: - ctr_val = ktime_get_boot_ns(); + ctr_val = ktime_get_boottime_ns(); break; case VMWARE_BACKDOOR_PMC_APPARENT_TIME: - ctr_val = ktime_get_boot_ns() + + ctr_val = ktime_get_boottime_ns() + vcpu->kvm->arch.kvmclock_offset; break; default: @@ -348,3 +371,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) { kvm_pmu_reset(vcpu); } + +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) +{ + struct kvm_pmu_event_filter tmp, *filter; + size_t size; + int r; + + if (copy_from_user(&tmp, argp, sizeof(tmp))) + return -EFAULT; + + if (tmp.action != KVM_PMU_EVENT_ALLOW && + tmp.action != KVM_PMU_EVENT_DENY) + return -EINVAL; + + if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) + return -E2BIG; + + size = struct_size(filter, events, tmp.nevents); + filter = kmalloc(size, GFP_KERNEL_ACCOUNT); + if (!filter) + return -ENOMEM; + + r = -EFAULT; + if (copy_from_user(filter, argp, size)) + goto cleanup; + + /* Ensure nevents can't be changed between the user copies. */ + *filter = tmp; + + mutex_lock(&kvm->lock); + rcu_swap_protected(kvm->arch.pmu_event_filter, filter, + mutex_is_locked(&kvm->lock)); + mutex_unlock(&kvm->lock); + + synchronize_srcu_expedited(&kvm->srcu); + r = 0; +cleanup: + kfree(filter); + return r; +} diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..58265f761c3b 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); bool is_vmware_backdoor_pmc(u32 pmc_idx); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 48c865a4e5dd..583b9fa656f3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -364,6 +364,10 @@ static int avic; module_param(avic, int, S_IRUGO); #endif +/* enable/disable Next RIP Save */ +static int nrips = true; +module_param(nrips, int, 0444); + /* enable/disable Virtual VMLOAD VMSAVE */ static int vls = true; module_param(vls, int, 0444); @@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) { + if (nrips && svm->vmcb->control.next_rip != 0) { WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } @@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) kvm_deliver_exception_payload(&svm->vcpu); - if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { + if (nr == BP_VECTOR && !nrips) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); /* @@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); + if (nrips) { + if (!boot_cpu_has(X86_FEATURE_NRIPS)) + nrips = false; + } + if (avic) { if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC) || @@ -3290,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_int_info_err, KVM_ISA_SVM); - rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map); + rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); if (rc) { if (rc == -EINVAL) kvm_inject_gp(&svm->vcpu, 0); @@ -3580,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) vmcb_gpa = svm->vmcb->save.rax; - rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map); + rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); if (rc) { if (rc == -EINVAL) kvm_inject_gp(&svm->vcpu, 0); @@ -3935,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm) { int err; - if (!static_cpu_has(X86_FEATURE_NRIPS)) + if (!nrips) return emulate_on_interception(svm); err = kvm_rdpmc(&svm->vcpu); @@ -5160,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); - if (avic_vcpu_is_running(vcpu)) - wrmsrl(SVM_AVIC_DOORBELL, - kvm_cpu_get_apicid(vcpu->cpu)); - else + if (avic_vcpu_is_running(vcpu)) { + int cpuid = vcpu->cpu; + + if (cpuid != get_cpu()) + wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid)); + put_cpu(); + } else kvm_vcpu_wake_up(vcpu); } @@ -5640,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) clgi(); kvm_load_guest_xcr0(vcpu); + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + kvm_wait_lapic_expire(vcpu); + /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if * it's non-zero. Since vmentry is serialising on affected CPUs, there @@ -5861,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xd9; } -static void svm_check_processor_compat(void *rtn) +static int __init svm_check_processor_compat(void) { - *(int *)rtn = 0; + return 0; } static bool svm_cpu_has_accelerated_tpr(void) @@ -5875,6 +5891,7 @@ static bool svm_has_emulated_msr(int index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; default: break; @@ -6162,15 +6179,9 @@ out: return ret; } -static void svm_handle_external_intr(struct kvm_vcpu *vcpu) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { - local_irq_enable(); - /* - * We must have an instruction with interrupts enabled, so - * the timer interrupt isn't delayed by the interrupt shadow. - */ - asm("nop"); - local_irq_disable(); + } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -7256,7 +7267,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .set_tdp_cr3 = set_tdp_cr3, .check_intercept = svm_check_intercept, - .handle_external_intr = svm_handle_external_intr, + .handle_exit_irqoff = svm_handle_exit_irqoff, .request_immediate_exit = __kvm_request_immediate_exit, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4d47a2631d1f..b5c831e79094 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state, __entry->vcpu_id = vcpu_id; __entry->hv_timer_in_use = hv_timer_in_use; ), - TP_printk("vcpu_id %x hv_timer %x\n", + TP_printk("vcpu_id %x hv_timer %x", __entry->vcpu_id, __entry->hv_timer_in_use) ); diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 5466c6d85cf3..72359709cdc1 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -3,6 +3,7 @@ #include <linux/errno.h> #include <linux/smp.h> +#include "../hyperv.h" #include "evmcs.h" #include "vmcs.h" #include "vmx.h" @@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) } #endif +bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) +{ + struct hv_vp_assist_page assist_page; + + *evmcs_gpa = -1ull; + + if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) + return false; + + if (unlikely(!assist_page.enlighten_vmentry)) + return false; + + *evmcs_gpa = assist_page.current_nested_vmcs; + + return true; +} + uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e0fcef85b332..39a24eec8884 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} static inline void evmcs_touch_msr_bitmap(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ +bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5f9c1a200201..bb509c254939 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -41,15 +41,19 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR]; #define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP]) #define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP]) -static u16 shadow_read_only_fields[] = { -#define SHADOW_FIELD_RO(x) x, +struct shadow_vmcs_field { + u16 encoding; + u16 offset; +}; +static struct shadow_vmcs_field shadow_read_only_fields[] = { +#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) }, #include "vmcs_shadow_fields.h" }; static int max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields); -static u16 shadow_read_write_fields[] = { -#define SHADOW_FIELD_RW(x) x, +static struct shadow_vmcs_field shadow_read_write_fields[] = { +#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) }, #include "vmcs_shadow_fields.h" }; static int max_shadow_read_write_fields = @@ -63,34 +67,40 @@ static void init_vmcs_shadow_fields(void) memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE); for (i = j = 0; i < max_shadow_read_only_fields; i++) { - u16 field = shadow_read_only_fields[i]; + struct shadow_vmcs_field entry = shadow_read_only_fields[i]; + u16 field = entry.encoding; if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && (i + 1 == max_shadow_read_only_fields || - shadow_read_only_fields[i + 1] != field + 1)) + shadow_read_only_fields[i + 1].encoding != field + 1)) pr_err("Missing field from shadow_read_only_field %x\n", field + 1); clear_bit(field, vmx_vmread_bitmap); -#ifdef CONFIG_X86_64 if (field & 1) +#ifdef CONFIG_X86_64 continue; +#else + entry.offset += sizeof(u32); #endif - if (j < i) - shadow_read_only_fields[j] = field; - j++; + shadow_read_only_fields[j++] = entry; } max_shadow_read_only_fields = j; for (i = j = 0; i < max_shadow_read_write_fields; i++) { - u16 field = shadow_read_write_fields[i]; + struct shadow_vmcs_field entry = shadow_read_write_fields[i]; + u16 field = entry.encoding; if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 && (i + 1 == max_shadow_read_write_fields || - shadow_read_write_fields[i + 1] != field + 1)) + shadow_read_write_fields[i + 1].encoding != field + 1)) pr_err("Missing field from shadow_read_write_field %x\n", field + 1); + WARN_ONCE(field >= GUEST_ES_AR_BYTES && + field <= GUEST_TR_AR_BYTES, + "Update vmcs12_write_any() to drop reserved bits from AR_BYTES"); + /* * PML and the preemption timer can be emulated, but the * processor cannot vmwrite to fields that don't exist @@ -115,13 +125,13 @@ static void init_vmcs_shadow_fields(void) clear_bit(field, vmx_vmwrite_bitmap); clear_bit(field, vmx_vmread_bitmap); -#ifdef CONFIG_X86_64 if (field & 1) +#ifdef CONFIG_X86_64 continue; +#else + entry.offset += sizeof(u32); #endif - if (j < i) - shadow_read_write_fields[j] = field; - j++; + shadow_read_write_fields[j++] = entry; } max_shadow_read_write_fields = j; } @@ -182,7 +192,7 @@ static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) { - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); + secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, -1ull); } @@ -238,22 +248,41 @@ static void free_nested(struct kvm_vcpu *vcpu) free_loaded_vmcs(&vmx->nested.vmcs02); } +static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, + struct loaded_vmcs *prev) +{ + struct vmcs_host_state *dest, *src; + + if (unlikely(!vmx->guest_state_loaded)) + return; + + src = &prev->host_state; + dest = &vmx->loaded_vmcs->host_state; + + vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base); + dest->ldt_sel = src->ldt_sel; +#ifdef CONFIG_X86_64 + dest->ds_sel = src->ds_sel; + dest->es_sel = src->es_sel; +#endif +} + static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { struct vcpu_vmx *vmx = to_vmx(vcpu); + struct loaded_vmcs *prev; int cpu; if (vmx->loaded_vmcs == vmcs) return; cpu = get_cpu(); - vmx_vcpu_put(vcpu); + prev = vmx->loaded_vmcs; vmx->loaded_vmcs = vmcs; - vmx_vcpu_load(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_sync_vmcs_host_state(vmx, prev); put_cpu(); - vm_entry_controls_reset_shadow(vmx); - vm_exit_controls_reset_shadow(vmx); vmx_segment_cache_clear(vmx); } @@ -930,8 +959,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * If PAE paging and EPT are both on, CR3 is not used by the CPU and * must not be dereferenced. */ - if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) && - !nested_ept) { + if (is_pae_paging(vcpu) && !nested_ept) { if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { *entry_failure_code = ENTRY_FAIL_PDPTE; return -EINVAL; @@ -1105,14 +1133,6 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) vmx->nested.msrs.misc_low = data; vmx->nested.msrs.misc_high = data >> 32; - /* - * If L1 has read-only VM-exit information fields, use the - * less permissive vmx_vmwrite_bitmap to specify write - * permissions for the shadow VMCS. - */ - if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) - vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); - return 0; } @@ -1214,6 +1234,11 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_VMX_VMCS_ENUM: vmx->nested.msrs.vmcs_enum = data; return 0; + case MSR_IA32_VMX_VMFUNC: + if (data & ~vmx->nested.msrs.vmfunc_controls) + return -EINVAL; + vmx->nested.msrs.vmfunc_controls = data; + return 0; default: /* * The rest of the VMX capability MSRs do not support restore. @@ -1301,41 +1326,29 @@ int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) } /* - * Copy the writable VMCS shadow fields back to the VMCS12, in case - * they have been modified by the L1 guest. Note that the "read-only" - * VM-exit information fields are actually writable if the vCPU is - * configured to support "VMWRITE to any supported field in the VMCS." + * Copy the writable VMCS shadow fields back to the VMCS12, in case they have + * been modified by the L1 guest. Note, "writable" in this context means + * "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of + * fields tagged SHADOW_FIELD_RO may or may not align with the "read-only" + * VM-exit information fields (which are actually writable if the vCPU is + * configured to support "VMWRITE to any supported field in the VMCS"). */ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) { - const u16 *fields[] = { - shadow_read_write_fields, - shadow_read_only_fields - }; - const int max_fields[] = { - max_shadow_read_write_fields, - max_shadow_read_only_fields - }; - int i, q; - unsigned long field; - u64 field_value; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; + struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); + struct shadow_vmcs_field field; + unsigned long val; + int i; preempt_disable(); vmcs_load(shadow_vmcs); - for (q = 0; q < ARRAY_SIZE(fields); q++) { - for (i = 0; i < max_fields[q]; i++) { - field = fields[q][i]; - field_value = __vmcs_readl(field); - vmcs12_write_any(get_vmcs12(&vmx->vcpu), field, field_value); - } - /* - * Skip the VM-exit information fields if they are read-only. - */ - if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu)) - break; + for (i = 0; i < max_shadow_read_write_fields; i++) { + field = shadow_read_write_fields[i]; + val = __vmcs_readl(field.encoding); + vmcs12_write_any(vmcs12, field.encoding, field.offset, val); } vmcs_clear(shadow_vmcs); @@ -1346,7 +1359,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) { - const u16 *fields[] = { + const struct shadow_vmcs_field *fields[] = { shadow_read_write_fields, shadow_read_only_fields }; @@ -1354,18 +1367,20 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) max_shadow_read_write_fields, max_shadow_read_only_fields }; - int i, q; - unsigned long field; - u64 field_value = 0; struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; + struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu); + struct shadow_vmcs_field field; + unsigned long val; + int i, q; vmcs_load(shadow_vmcs); for (q = 0; q < ARRAY_SIZE(fields); q++) { for (i = 0; i < max_fields[q]; i++) { field = fields[q][i]; - vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value); - __vmcs_writel(field, field_value); + val = vmcs12_read_any(vmcs12, field.encoding, + field.offset); + __vmcs_writel(field.encoding, val); } } @@ -1623,7 +1638,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; * evmcs->host_idtr_base = vmcs12->host_idtr_base; * evmcs->host_rsp = vmcs12->host_rsp; - * sync_vmcs12() doesn't read these: + * sync_vmcs02_to_vmcs12() doesn't read these: * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; * evmcs->msr_bitmap = vmcs12->msr_bitmap; @@ -1768,26 +1783,22 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, bool from_launch) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct hv_vp_assist_page assist_page; + bool evmcs_gpa_changed = false; + u64 evmcs_gpa; if (likely(!vmx->nested.enlightened_vmcs_enabled)) return 1; - if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) + if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) return 1; - if (unlikely(!assist_page.enlighten_vmentry)) - return 1; - - if (unlikely(assist_page.current_nested_vmcs != - vmx->nested.hv_evmcs_vmptr)) { - + if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { if (!vmx->nested.hv_evmcs) vmx->nested.current_vmptr = -1ull; nested_release_evmcs(vcpu); - if (kvm_vcpu_map(vcpu, gpa_to_gfn(assist_page.current_nested_vmcs), + if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa), &vmx->nested.hv_evmcs_map)) return 0; @@ -1822,15 +1833,9 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, } vmx->nested.dirty_vmcs12 = true; - /* - * As we keep L2 state for one guest only 'hv_clean_fields' mask - * can't be used when we switch between them. Reset it here for - * simplicity. - */ - vmx->nested.hv_evmcs->hv_clean_fields &= - ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; - vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs; + vmx->nested.hv_evmcs_vmptr = evmcs_gpa; + evmcs_gpa_changed = true; /* * Unlike normal vmcs12, enlightened vmcs12 is not fully * reloaded from guest's memory (read only fields, fields not @@ -1844,10 +1849,19 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, } } + + /* + * Clean fields data can't de used on VMLAUNCH and when we switch + * between different L2 guests as KVM keeps a single VMCS12 per L1. + */ + if (from_launch || evmcs_gpa_changed) + vmx->nested.hv_evmcs->hv_clean_fields &= + ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + return 1; } -void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu) +void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1868,7 +1882,7 @@ void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu) copy_vmcs12_to_shadow(vmx); } - vmx->nested.need_vmcs12_sync = false; + vmx->nested.need_vmcs12_to_shadow_sync = false; } static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) @@ -1948,8 +1962,20 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) if (cpu_has_vmx_msr_bitmap()) vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); - if (enable_pml) + /* + * The PML address never changes, so it is constant in vmcs02. + * Conceptually we want to copy the PML index from vmcs01 here, + * and then back to vmcs01 on nested vmexit. But since we flush + * the log and reset GUEST_PML_INDEX on each vmexit, the PML + * index is also effectively constant in vmcs02. + */ + if (enable_pml) { vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + } + + if (cpu_has_vmx_encls_vmexit()) + vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); /* * Set the MSR load/store lists to match L0's settings. Only the @@ -1963,7 +1989,7 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) vmx_set_constant_host_state(vmx); } -static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx, +static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { prepare_vmcs02_constant_state(vmx); @@ -1984,17 +2010,14 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) - prepare_vmcs02_early_full(vmx, vmcs12); + prepare_vmcs02_early_rare(vmx, vmcs12); /* * PIN CONTROLS */ - exec_control = vmcs12->pin_based_vm_exec_control; - - /* Preemption timer setting is computed directly in vmx_vcpu_run. */ - exec_control |= vmcs_config.pin_based_exec_ctrl; - exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - vmx->loaded_vmcs->hv_timer_armed = false; + exec_control = vmx_pin_based_exec_ctrl(vmx); + exec_control |= (vmcs12->pin_based_vm_exec_control & + ~PIN_BASED_VMX_PREEMPTION_TIMER); /* Posted interrupts setting is only taken from vmcs12. */ if (nested_cpu_has_posted_intr(vmcs12)) { @@ -2003,7 +2026,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) } else { exec_control &= ~PIN_BASED_POSTED_INTR; } - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); + pin_controls_set(vmx, exec_control); /* * EXEC CONTROLS @@ -2014,28 +2037,31 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; - /* - * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if - * nested_get_vmcs12_pages can't fix it up, the illegal value - * will result in a VM entry failure. - */ - if (exec_control & CPU_BASED_TPR_SHADOW) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); + if (exec_control & CPU_BASED_TPR_SHADOW) vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); - } else { #ifdef CONFIG_X86_64 + else exec_control |= CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING; #endif - } /* * A vmexit (to either L1 hypervisor or L0 userspace) is always needed * for I/O port accesses. */ - exec_control &= ~CPU_BASED_USE_IO_BITMAPS; exec_control |= CPU_BASED_UNCOND_IO_EXITING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); + exec_control &= ~CPU_BASED_USE_IO_BITMAPS; + + /* + * This bit will be computed in nested_get_vmcs12_pages, because + * we do not have access to L1's MSR bitmap yet. For now, keep + * the same bit as before, hoping to avoid multiple VMWRITEs that + * only set/clear this bit. + */ + exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; + exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS; + + exec_controls_set(vmx, exec_control); /* * SECONDARY EXEC CONTROLS @@ -2061,22 +2087,19 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) /* VMCS shadowing for L2 is emulated for now */ exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS; - if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) - vmcs_write16(GUEST_INTR_STATUS, - vmcs12->guest_intr_status); - /* - * Write an illegal value to APIC_ACCESS_ADDR. Later, - * nested_get_vmcs12_pages will either fix it up or - * remove the VM execution control. + * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4() + * will not have to rewrite the controls just for this bit. */ - if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) - vmcs_write64(APIC_ACCESS_ADDR, -1ull); + if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() && + (vmcs12->guest_cr4 & X86_CR4_UMIP)) + exec_control |= SECONDARY_EXEC_DESC; - if (exec_control & SECONDARY_EXEC_ENCLS_EXITING) - vmcs_write64(ENCLS_EXITING_BITMAP, -1ull); + if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) + vmcs_write16(GUEST_INTR_STATUS, + vmcs12->guest_intr_status); - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); + secondary_exec_controls_set(vmx, exec_control); } /* @@ -2095,7 +2118,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (guest_efer != host_efer) exec_control |= VM_ENTRY_LOAD_IA32_EFER; } - vm_entry_controls_init(vmx, exec_control); + vm_entry_controls_set(vmx, exec_control); /* * EXIT CONTROLS @@ -2107,17 +2130,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) exec_control = vmx_vmexit_ctrl(); if (cpu_has_load_ia32_efer() && guest_efer != host_efer) exec_control |= VM_EXIT_LOAD_IA32_EFER; - vm_exit_controls_init(vmx, exec_control); - - /* - * Conceptually we want to copy the PML address and index from - * vmcs01 here, and then back to vmcs01 on nested vmexit. But, - * since we always flush the log on each vmexit and never change - * the PML address (once set), this happens to be equivalent to - * simply resetting the index in vmcs02. - */ - if (enable_pml) - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vm_exit_controls_set(vmx, exec_control); /* * Interrupt/Exception Fields @@ -2138,7 +2151,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) } } -static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) +static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; @@ -2162,6 +2175,8 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); + vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); @@ -2198,6 +2213,10 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); } + + if (kvm_mpx_supported() && vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); } if (nested_cpu_has_xsaves(vmcs12)) @@ -2233,14 +2252,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); set_cr4_guest_host_mask(vmx); - - if (kvm_mpx_supported()) { - if (vmx->nested.nested_run_pending && - (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); - else - vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); - } } /* @@ -2259,20 +2270,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { struct vcpu_vmx *vmx = to_vmx(vcpu); struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; + bool load_guest_pdptrs_vmcs12 = false; - if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) { - prepare_vmcs02_full(vmx, vmcs12); + if (vmx->nested.dirty_vmcs12 || hv_evmcs) { + prepare_vmcs02_rare(vmx, vmcs12); vmx->nested.dirty_vmcs12 = false; - } - /* - * First, the fields that are shadowed. This must be kept in sync - * with vmcs_shadow_fields.h. - */ - if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & - HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { - vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); + load_guest_pdptrs_vmcs12 = !hv_evmcs || + !(hv_evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1); } if (vmx->nested.nested_run_pending && @@ -2283,6 +2289,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); } + if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending || + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); vmx_set_rflags(vcpu, vmcs12->guest_rflags); /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the @@ -2372,6 +2381,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return -EINVAL; + /* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */ + if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) && + is_pae_paging(vcpu)) { + vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); + vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); + vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); + vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + } + if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; @@ -2609,6 +2627,30 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, !kvm_pat_valid(vmcs12->host_ia32_pat)) return -EINVAL; + ia32e = (vmcs12->vm_exit_controls & + VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; + + if (vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK) || + vmcs12->host_cs_selector == 0 || + vmcs12->host_tr_selector == 0 || + (vmcs12->host_ss_selector == 0 && !ia32e)) + return -EINVAL; + +#ifdef CONFIG_X86_64 + if (is_noncanonical_address(vmcs12->host_fs_base, vcpu) || + is_noncanonical_address(vmcs12->host_gs_base, vcpu) || + is_noncanonical_address(vmcs12->host_gdtr_base, vcpu) || + is_noncanonical_address(vmcs12->host_idtr_base, vcpu) || + is_noncanonical_address(vmcs12->host_tr_base, vcpu)) + return -EINVAL; +#endif + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, @@ -2616,8 +2658,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, * the host address-space size VM-exit control. */ if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { - ia32e = (vmcs12->vm_exit_controls & - VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) @@ -2781,7 +2821,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) [launched]"i"(offsetof(struct loaded_vmcs, launched)), [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), [wordsize]"i"(sizeof(ulong)) - : "cc", "memory" + : "memory" ); if (vmx->msr_autoload.host.nr) @@ -2851,18 +2891,14 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) hpa = page_to_phys(vmx->nested.apic_access_page); vmcs_write64(APIC_ACCESS_ADDR, hpa); } else { - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + secondary_exec_controls_clearbit(vmx, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); } } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { map = &vmx->nested.virtual_apic_map; - /* - * If translation failed, VM entry will fail because - * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. - */ if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) { vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn)); } else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) && @@ -2876,11 +2912,13 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) * _not_ what the processor does but it's basically the * only possibility we have. */ - vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_TPR_SHADOW); + exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW); } else { - printk("bad virtual-APIC page address\n"); - dump_vmcs(); + /* + * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to + * force VM-Entry to fail. + */ + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); } } @@ -2896,11 +2934,9 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) } } if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12)) - vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_USE_MSR_BITMAPS); + exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS); else - vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_USE_MSR_BITMAPS); + exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS); } /* @@ -2953,7 +2989,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) u32 exit_reason = EXIT_REASON_INVALID_STATE; u32 exit_qual; - evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & + evaluate_pending_interrupts = exec_controls_get(vmx) & (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); @@ -2964,6 +3000,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + /* + * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* + * nested early checks are disabled. In the event of a "late" VM-Fail, + * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its + * software model to the pre-VMEntry host state. When EPT is disabled, + * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes + * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing + * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to + * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested + * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is + * guaranteed to be overwritten with a shadow CR3 prior to re-entering + * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as + * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks + * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail + * path would need to manually save/restore vmcs01.GUEST_CR3. + */ + if (!enable_ept && !nested_early_check) + vmcs_writel(GUEST_CR3, vcpu->arch.cr3); + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); prepare_vmcs02_early(vmx, vmcs12); @@ -3059,7 +3114,7 @@ vmentry_fail_vmexit: vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; vmcs12->exit_qualification = exit_qual; if (enable_shadow_vmcs || vmx->nested.hv_evmcs) - vmx->nested.need_vmcs12_sync = true; + vmx->nested.need_vmcs12_to_shadow_sync = true; return 1; } @@ -3077,7 +3132,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (!nested_vmx_check_permission(vcpu)) return 1; - if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true)) + if (!nested_vmx_handle_enlightened_vmptrld(vcpu, launch)) return 1; if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull) @@ -3393,20 +3448,57 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; } -/* - * Update the guest state fields of vmcs12 to reflect changes that - * occurred while L2 was running. (The "IA-32e mode guest" bit of the - * VM-entry controls is also updated, since this is really a guest - * state bit.) - */ -static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) -{ - vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); - vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); +static bool is_vmcs12_ext_field(unsigned long field) +{ + switch (field) { + case GUEST_ES_SELECTOR: + case GUEST_CS_SELECTOR: + case GUEST_SS_SELECTOR: + case GUEST_DS_SELECTOR: + case GUEST_FS_SELECTOR: + case GUEST_GS_SELECTOR: + case GUEST_LDTR_SELECTOR: + case GUEST_TR_SELECTOR: + case GUEST_ES_LIMIT: + case GUEST_CS_LIMIT: + case GUEST_SS_LIMIT: + case GUEST_DS_LIMIT: + case GUEST_FS_LIMIT: + case GUEST_GS_LIMIT: + case GUEST_LDTR_LIMIT: + case GUEST_TR_LIMIT: + case GUEST_GDTR_LIMIT: + case GUEST_IDTR_LIMIT: + case GUEST_ES_AR_BYTES: + case GUEST_DS_AR_BYTES: + case GUEST_FS_AR_BYTES: + case GUEST_GS_AR_BYTES: + case GUEST_LDTR_AR_BYTES: + case GUEST_TR_AR_BYTES: + case GUEST_ES_BASE: + case GUEST_CS_BASE: + case GUEST_SS_BASE: + case GUEST_DS_BASE: + case GUEST_FS_BASE: + case GUEST_GS_BASE: + case GUEST_LDTR_BASE: + case GUEST_TR_BASE: + case GUEST_GDTR_BASE: + case GUEST_IDTR_BASE: + case GUEST_PENDING_DBG_EXCEPTIONS: + case GUEST_BNDCFGS: + return true; + default: + break; + } - vmcs12->guest_rsp = kvm_rsp_read(vcpu); - vmcs12->guest_rip = kvm_rip_read(vcpu); - vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); + return false; +} + +static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); @@ -3427,8 +3519,6 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); - vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); - vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); @@ -3444,11 +3534,69 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE); vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); + vmcs12->guest_pending_dbg_exceptions = + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (kvm_mpx_supported()) + vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + + vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false; +} + +static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + + if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare) + return; + + + WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01); + + cpu = get_cpu(); + vmx->loaded_vmcs = &vmx->nested.vmcs02; + vmx_vcpu_load(&vmx->vcpu, cpu); + + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + vmx->loaded_vmcs = &vmx->vmcs01; + vmx_vcpu_load(&vmx->vcpu, cpu); + put_cpu(); +} + +/* + * Update the guest state fields of vmcs12 to reflect changes that + * occurred while L2 was running. (The "IA-32e mode guest" bit of the + * VM-entry controls is also updated, since this is really a guest + * state bit.) + */ +static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->nested.hv_evmcs) + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs; + + vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); + vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); + + vmcs12->guest_rsp = kvm_rsp_read(vcpu); + vmcs12->guest_rip = kvm_rip_read(vcpu); + vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); + + vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); + vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); + + vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); + vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); + vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - vmcs12->guest_pending_dbg_exceptions = - vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; else @@ -3469,10 +3617,12 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) */ if (enable_ept) { vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); - vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); - vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); - vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); - vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); + if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) { + vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); + vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); + vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); + vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3); + } } vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); @@ -3484,22 +3634,11 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); - if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); - vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - } - /* TODO: These cannot have changed unless we have MSR bitmaps and - * the relevant bit asks not to trap the change */ - if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) - vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) vmcs12->guest_ia32_efer = vcpu->arch.efer; - vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); - vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); - vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - if (kvm_mpx_supported()) - vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); } /* @@ -3517,11 +3656,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification) { - /* update guest state fields: */ - sync_vmcs12(vcpu, vmcs12); - /* update exit information fields: */ - vmcs12->vm_exit_reason = exit_reason; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; @@ -3775,18 +3910,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); nested_ept_uninit_mmu_context(vcpu); - - /* - * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 - * points to shadow pages! Fortunately we only get here after a WARN_ON - * if EPT is disabled, so a VMabort is perfectly fine. - */ - if (enable_ept) { - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); - } else { - nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); - } + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -3794,7 +3919,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) * VMFail, like everything else we just need to ensure our * software model is up-to-date. */ - ept_save_pdptrs(vcpu); + if (enable_ept) + ept_save_pdptrs(vcpu); kvm_mmu_reset_context(vcpu); @@ -3882,14 +4008,14 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vcpu->arch.tsc_offset -= vmcs12->tsc_offset; if (likely(!vmx->fail)) { - if (exit_reason == -1) - sync_vmcs12(vcpu, vmcs12); - else + sync_vmcs02_to_vmcs12(vcpu, vmcs12); + + if (exit_reason != -1) prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); /* - * Must happen outside of sync_vmcs12() as it will + * Must happen outside of sync_vmcs02_to_vmcs12() as it will * also be used to capture vmcs12 cache as part of * capturing nVMX state for snapshot (migration). * @@ -3945,7 +4071,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) - vmx->nested.need_vmcs12_sync = true; + vmx->nested.need_vmcs12_to_shadow_sync = true; /* in case we halted in L2 */ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -4008,7 +4134,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * #UD or #GP. */ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, - u32 vmx_instruction_info, bool wr, gva_t *ret) + u32 vmx_instruction_info, bool wr, int len, gva_t *ret) { gva_t off; bool exn; @@ -4115,7 +4241,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, */ if (!(s.base == 0 && s.limit == 0xffffffff && ((s.type & 8) || !(s.type & 4)))) - exn = exn || (off + sizeof(u64) > s.limit); + exn = exn || ((u64)off + len - 1 > s.limit); } if (exn) { kvm_queue_exception_e(vcpu, @@ -4134,7 +4260,8 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) struct x86_exception e; if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva)) + vmcs_read32(VMX_INSTRUCTION_INFO), false, + sizeof(*vmpointer), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { @@ -4300,11 +4427,13 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) if (vmx->nested.current_vmptr == -1ull) return; + copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu)); + if (enable_shadow_vmcs) { /* copy to memory all shadowed fields in case they were modified */ copy_shadow_to_vmcs12(vmx); - vmx->nested.need_vmcs12_sync = false; + vmx->nested.need_vmcs12_to_shadow_sync = false; vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; @@ -4334,6 +4463,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 zero = 0; gpa_t vmptr; + u64 evmcs_gpa; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -4349,10 +4479,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); - if (vmx->nested.hv_evmcs_map.hva) { - if (vmptr == vmx->nested.hv_evmcs_vmptr) - nested_release_evmcs(vcpu); - } else { + /* + * When Enlightened VMEntry is enabled on the calling CPU we treat + * memory area pointer by vmptr as Enlightened VMCS (as there's no good + * way to distinguish it from VMCS12) and we must not corrupt it by + * writing to the non-existent 'launch_state' field. The area doesn't + * have to be the currently active EVMCS on the calling CPU and there's + * nothing KVM has to do to transition it from 'active' to 'non-active' + * state. It is possible that the area will stay mapped as + * vmx->nested.hv_evmcs but this shouldn't be a problem. + */ + if (likely(!vmx->nested.enlightened_vmcs_enabled || + !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vcpu); @@ -4386,8 +4524,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) u64 field_value; unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); + int len; gva_t gva = 0; struct vmcs12 *vmcs12; + short offset; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -4409,11 +4549,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); - /* Read the field, zero-extended to a u64 field_value */ - if (vmcs12_read_any(vmcs12, field, &field_value) < 0) + + offset = vmcs_field_to_offset(field); + if (offset < 0) return nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + + /* Read the field, zero-extended to a u64 field_value */ + field_value = vmcs12_read_any(vmcs12, field, offset); + /* * Now copy part of this value to register or memory, as requested. * Note that the number of bits actually copied is 32 or 64 depending @@ -4423,21 +4570,45 @@ static int handle_vmread(struct kvm_vcpu *vcpu) kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), field_value); } else { + len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, true, &gva)) + vmx_instruction_info, true, len, &gva)) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ - kvm_write_guest_virt_system(vcpu, gva, &field_value, - (is_long_mode(vcpu) ? 8 : 4), NULL); + kvm_write_guest_virt_system(vcpu, gva, &field_value, len, NULL); } return nested_vmx_succeed(vcpu); } +static bool is_shadow_field_rw(unsigned long field) +{ + switch (field) { +#define SHADOW_FIELD_RW(x, y) case x: +#include "vmcs_shadow_fields.h" + return true; + default: + break; + } + return false; +} + +static bool is_shadow_field_ro(unsigned long field) +{ + switch (field) { +#define SHADOW_FIELD_RO(x, y) case x: +#include "vmcs_shadow_fields.h" + return true; + default: + break; + } + return false; +} static int handle_vmwrite(struct kvm_vcpu *vcpu) { unsigned long field; + int len; gva_t gva; struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -4452,6 +4623,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) u64 field_value = 0; struct x86_exception e; struct vmcs12 *vmcs12; + short offset; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -4463,11 +4635,11 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field_value = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 3) & 0xf)); else { + len = is_64_bit_mode(vcpu) ? 8 : 4; if (get_vmx_mem_address(vcpu, exit_qualification, - vmx_instruction_info, false, &gva)) + vmx_instruction_info, false, len, &gva)) return 1; - if (kvm_read_guest_virt(vcpu, gva, &field_value, - (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { + if (kvm_read_guest_virt(vcpu, gva, &field_value, len, &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } @@ -4484,9 +4656,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - if (!is_guest_mode(vcpu)) + if (!is_guest_mode(vcpu)) { vmcs12 = get_vmcs12(vcpu); - else { + + /* + * Ensure vmcs12 is up-to-date before any VMWRITE that dirties + * vmcs12, else we may crush a field or consume a stale value. + */ + if (!is_shadow_field_rw(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + } else { /* * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE * to shadowed-field sets the ALU flags for VMfailInvalid. @@ -4496,28 +4675,46 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) vmcs12 = get_shadow_vmcs12(vcpu); } - if (vmcs12_write_any(vmcs12, field, field_value) < 0) + offset = vmcs_field_to_offset(field); + if (offset < 0) return nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); /* - * Do not track vmcs12 dirty-state if in guest-mode - * as we actually dirty shadow vmcs12 instead of vmcs12. + * Some Intel CPUs intentionally drop the reserved bits of the AR byte + * fields on VMWRITE. Emulate this behavior to ensure consistent KVM + * behavior regardless of the underlying hardware, e.g. if an AR_BYTE + * field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD + * from L1 will return a different value than VMREAD from L2 (L1 sees + * the stripped down value, L2 sees the full value as stored by KVM). */ - if (!is_guest_mode(vcpu)) { - switch (field) { -#define SHADOW_FIELD_RW(x) case x: -#include "vmcs_shadow_fields.h" - /* - * The fields that can be updated by L1 without a vmexit are - * always updated in the vmcs02, the others go down the slow - * path of prepare_vmcs02. - */ - break; - default: - vmx->nested.dirty_vmcs12 = true; - break; + if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES) + field_value &= 0x1f0ff; + + vmcs12_write_any(vmcs12, field, offset, field_value); + + /* + * Do not track vmcs12 dirty-state if in guest-mode as we actually + * dirty shadow vmcs12 instead of vmcs12. Fields that can be updated + * by L1 without a vmexit are always updated in the vmcs02, i.e. don't + * "dirty" vmcs12, all others go down the prepare_vmcs02() slow path. + */ + if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) { + /* + * L1 can read these fields without exiting, ensure the + * shadow VMCS is up-to-date. + */ + if (enable_shadow_vmcs && is_shadow_field_ro(field)) { + preempt_disable(); + vmcs_load(vmx->vmcs01.shadow_vmcs); + + __vmcs_writel(field, field_value); + + vmcs_clear(vmx->vmcs01.shadow_vmcs); + vmcs_load(vmx->loaded_vmcs->vmcs); + preempt_enable(); } + vmx->nested.dirty_vmcs12 = true; } return nested_vmx_succeed(vcpu); @@ -4527,11 +4724,10 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) { vmx->nested.current_vmptr = vmptr; if (enable_shadow_vmcs) { - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_SHADOW_VMCS); + secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, __pa(vmx->vmcs01.shadow_vmcs)); - vmx->nested.need_vmcs12_sync = true; + vmx->nested.need_vmcs12_to_shadow_sync = true; } vmx->nested.dirty_vmcs12 = true; } @@ -4615,7 +4811,8 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) if (unlikely(to_vmx(vcpu)->nested.hv_evmcs)) return 1; - if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, + true, sizeof(gpa_t), &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, @@ -4661,7 +4858,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) * operand is read even if it isn't needed (e.g., for type==global) */ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, &gva)) + vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { kvm_inject_page_fault(vcpu, &e); @@ -4670,13 +4867,11 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: + case VMX_EPT_EXTENT_CONTEXT: /* - * TODO: track mappings and invalidate - * single context requests appropriately + * TODO: Sync the necessary shadow EPT roots here, rather than + * at the next emulated VM-entry. */ - case VMX_EPT_EXTENT_CONTEXT: - kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); break; default: BUG_ON(1); @@ -4723,7 +4918,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) * operand is read even if it isn't needed (e.g., for type==global) */ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, &gva)) + vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { kvm_inject_page_fault(vcpu, &e); @@ -5240,9 +5435,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); - if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) - kvm_state.flags |= KVM_STATE_NESTED_EVMCS; - if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr; @@ -5251,6 +5443,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx_has_valid_vmcs12(vcpu)) { kvm_state.size += sizeof(user_vmx_nested_state->vmcs12); + if (vmx->nested.hv_evmcs) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; + if (is_guest_mode(vcpu) && nested_cpu_has_shadow_vmcs(vmcs12) && vmcs12->vmcs_link_pointer != -1ull) @@ -5284,12 +5479,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, * When running L2, the authoritative vmcs12 state is in the * vmcs02. When running L1, the authoritative vmcs12 state is * in the shadow or enlightened vmcs linked to vmcs01, unless - * need_vmcs12_sync is set, in which case, the authoritative + * need_vmcs12_to_shadow_sync is set, in which case, the authoritative * vmcs12 state is in the vmcs12 already. */ if (is_guest_mode(vcpu)) { - sync_vmcs12(vcpu, vmcs12); - } else if (!vmx->nested.need_vmcs12_sync) { + sync_vmcs02_to_vmcs12(vcpu, vmcs12); + sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + } else if (!vmx->nested.need_vmcs12_to_shadow_sync) { if (vmx->nested.hv_evmcs) copy_enlightened_to_vmcs12(vmx); else if (enable_shadow_vmcs) @@ -5350,6 +5546,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) return -EINVAL; + /* + * KVM_STATE_NESTED_EVMCS used to signal that KVM should + * enable eVMCS capability on vCPU. However, since then + * code was changed such that flag signals vmcs12 should + * be copied into eVMCS in guest memory. + * + * To preserve backwards compatability, allow user + * to set this flag even when there is no VMXON region. + */ if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS) return -EINVAL; } else { @@ -5358,7 +5563,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa)) return -EINVAL; - } + } if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) @@ -5373,20 +5578,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags * must be zero. */ - if (is_smm(vcpu) ? kvm_state->flags : kvm_state->hdr.vmx.smm.flags) + if (is_smm(vcpu) ? + (kvm_state->flags & + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING)) + : kvm_state->hdr.vmx.smm.flags) return -EINVAL; if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && !(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)) return -EINVAL; - vmx_leave_nested(vcpu); - if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { - if (!nested_vmx_allowed(vcpu)) + if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) && + (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled)) return -EINVAL; - nested_enable_evmcs(vcpu, NULL); - } + vmx_leave_nested(vcpu); if (kvm_state->hdr.vmx.vmxon_pa == -1ull) return 0; @@ -5411,7 +5617,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, * Sync eVMCS upon entry as we may not have * HV_X64_MSR_VP_ASSIST_PAGE set up yet. */ - vmx->nested.need_vmcs12_sync = true; + vmx->nested.need_vmcs12_to_shadow_sync = true; } else { return -EINVAL; } @@ -5479,14 +5685,8 @@ error_guest_mode: void nested_vmx_vcpu_setup(void) { if (enable_shadow_vmcs) { - /* - * At vCPU creation, "VMWRITE to any supported field - * in the VMCS" is supported, so use the more - * permissive vmx_vmread_bitmap to specify both read - * and write permissions for the shadow VMCS. - */ vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap)); - vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap)); + vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap)); } } @@ -5616,10 +5816,15 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps, msrs->secondary_ctls_low = 0; msrs->secondary_ctls_high &= SECONDARY_EXEC_DESC | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_WBINVD_EXITING; + SECONDARY_EXEC_RDRAND_EXITING | + SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_RDSEED_EXITING | + SECONDARY_EXEC_XSAVES; /* * We can emulate "VMCS shadowing," even if the hardware @@ -5739,14 +5944,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; - /* - * Without EPT it is not possible to restore L1's CR3 and PDPTR on - * VMfail, because they are not available in vmcs01. Just always - * use hardware checks. - */ - if (!enable_ept) - nested_early_check = 1; - if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) { diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index e847ff1019a2..187d39bf0bf1 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification); -void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu); +void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu); int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata); int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, - u32 vmx_instruction_info, bool wr, gva_t *ret); + u32 vmx_instruction_info, bool wr, int len, gva_t *ret); static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index b8e50f76fefc..2200fb698dd0 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value) __vmcs_writel(field, value); #ifndef CONFIG_X86_64 - asm volatile (""); __vmcs_writel(field+1, value >> 32); #endif } diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index cb6079f8a227..481ad879197b 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -42,6 +42,14 @@ struct vmcs_host_state { #endif }; +struct vmcs_controls_shadow { + u32 vm_entry; + u32 vm_exit; + u32 pin; + u32 exec; + u32 secondary_exec; +}; + /* * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs @@ -53,7 +61,7 @@ struct loaded_vmcs { int cpu; bool launched; bool nmi_known_unmasked; - bool hv_timer_armed; + bool hv_timer_soft_disabled; /* Support for vnmi-less CPUs */ int soft_vnmi_blocked; ktime_t entry_time; @@ -61,6 +69,7 @@ struct loaded_vmcs { unsigned long *msr_bitmap; struct list_head loaded_vmcss_on_cpu_link; struct vmcs_host_state host_state; + struct vmcs_controls_shadow controls_shadow; }; static inline bool is_exception_n(u32 intr_info, u8 vector) @@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info) == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); } +static inline bool is_external_intr(u32 intr_info) +{ + return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) + == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR); +} + enum vmcs_field_width { VMCS_FIELD_WIDTH_U16 = 0, VMCS_FIELD_WIDTH_U64 = 1, diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 337718fc8a36..d0c6df373f67 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -395,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field) #undef ROL16 -/* - * Read a vmcs12 field. Since these can have varying lengths and we return - * one type, we chose the biggest type (u64) and zero-extend the return value - * to that size. Note that the caller, handle_vmread, might need to use only - * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of - * 64-bit fields are to be returned). - */ -static inline int vmcs12_read_any(struct vmcs12 *vmcs12, - unsigned long field, u64 *ret) +static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field, + u16 offset) { - short offset = vmcs_field_to_offset(field); - char *p; - - if (offset < 0) - return offset; - - p = (char *)vmcs12 + offset; + char *p = (char *)vmcs12 + offset; switch (vmcs_field_width(field)) { case VMCS_FIELD_WIDTH_NATURAL_WIDTH: - *ret = *((natural_width *)p); - return 0; + return *((natural_width *)p); case VMCS_FIELD_WIDTH_U16: - *ret = *((u16 *)p); - return 0; + return *((u16 *)p); case VMCS_FIELD_WIDTH_U32: - *ret = *((u32 *)p); - return 0; + return *((u32 *)p); case VMCS_FIELD_WIDTH_U64: - *ret = *((u64 *)p); - return 0; + return *((u64 *)p); default: - WARN_ON(1); - return -ENOENT; + WARN_ON_ONCE(1); + return -1; } } -static inline int vmcs12_write_any(struct vmcs12 *vmcs12, - unsigned long field, u64 field_value){ - short offset = vmcs_field_to_offset(field); +static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field, + u16 offset, u64 field_value) +{ char *p = (char *)vmcs12 + offset; - if (offset < 0) - return offset; - switch (vmcs_field_width(field)) { case VMCS_FIELD_WIDTH_U16: *(u16 *)p = field_value; - return 0; + break; case VMCS_FIELD_WIDTH_U32: *(u32 *)p = field_value; - return 0; + break; case VMCS_FIELD_WIDTH_U64: *(u64 *)p = field_value; - return 0; + break; case VMCS_FIELD_WIDTH_NATURAL_WIDTH: *(natural_width *)p = field_value; - return 0; + break; default: - WARN_ON(1); - return -ENOENT; + WARN_ON_ONCE(1); + break; } - } #endif /* __KVM_X86_VMX_VMCS12_H */ diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h index 132432f375c2..eb1ecd16fd22 100644 --- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h +++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h @@ -1,8 +1,12 @@ +#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW) +BUILD_BUG_ON(1) +#endif + #ifndef SHADOW_FIELD_RO -#define SHADOW_FIELD_RO(x) +#define SHADOW_FIELD_RO(x, y) #endif #ifndef SHADOW_FIELD_RW -#define SHADOW_FIELD_RW(x) +#define SHADOW_FIELD_RW(x, y) #endif /* @@ -28,47 +32,48 @@ */ /* 16-bits */ -SHADOW_FIELD_RW(GUEST_INTR_STATUS) -SHADOW_FIELD_RW(GUEST_PML_INDEX) -SHADOW_FIELD_RW(HOST_FS_SELECTOR) -SHADOW_FIELD_RW(HOST_GS_SELECTOR) +SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status) +SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index) +SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector) +SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector) /* 32-bits */ -SHADOW_FIELD_RO(VM_EXIT_REASON) -SHADOW_FIELD_RO(VM_EXIT_INTR_INFO) -SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN) -SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD) -SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE) -SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE) -SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL) -SHADOW_FIELD_RW(EXCEPTION_BITMAP) -SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE) -SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD) -SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN) -SHADOW_FIELD_RW(TPR_THRESHOLD) -SHADOW_FIELD_RW(GUEST_CS_AR_BYTES) -SHADOW_FIELD_RW(GUEST_SS_AR_BYTES) -SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO) -SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE) +SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason) +SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info) +SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len) +SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field) +SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code) +SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code) +SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes) +SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes) +SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control) +SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control) +SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap) +SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code) +SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field) +SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len) +SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold) +SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info) +SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value) /* Natural width */ -SHADOW_FIELD_RO(EXIT_QUALIFICATION) -SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS) -SHADOW_FIELD_RW(GUEST_RIP) -SHADOW_FIELD_RW(GUEST_RSP) -SHADOW_FIELD_RW(GUEST_CR0) -SHADOW_FIELD_RW(GUEST_CR3) -SHADOW_FIELD_RW(GUEST_CR4) -SHADOW_FIELD_RW(GUEST_RFLAGS) -SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK) -SHADOW_FIELD_RW(CR0_READ_SHADOW) -SHADOW_FIELD_RW(CR4_READ_SHADOW) -SHADOW_FIELD_RW(HOST_FS_BASE) -SHADOW_FIELD_RW(HOST_GS_BASE) +SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification) +SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address) +SHADOW_FIELD_RW(GUEST_RIP, guest_rip) +SHADOW_FIELD_RW(GUEST_RSP, guest_rsp) +SHADOW_FIELD_RW(GUEST_CR0, guest_cr0) +SHADOW_FIELD_RW(GUEST_CR3, guest_cr3) +SHADOW_FIELD_RW(GUEST_CR4, guest_cr4) +SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags) +SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask) +SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow) +SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow) +SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base) +SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base) /* 64-bit */ -SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS) -SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH) +SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address) +SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address) #undef SHADOW_FIELD_RO #undef SHADOW_FIELD_RW diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d98eac371c0a..69536553446d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field { }; u64 host_efer; +static unsigned long host_idt_base; /* * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm @@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx) wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl); } +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base) +{ + if (unlikely(fs_sel != host->fs_sel)) { + if (!(fs_sel & 7)) + vmcs_write16(HOST_FS_SELECTOR, fs_sel); + else + vmcs_write16(HOST_FS_SELECTOR, 0); + host->fs_sel = fs_sel; + } + if (unlikely(gs_sel != host->gs_sel)) { + if (!(gs_sel & 7)) + vmcs_write16(HOST_GS_SELECTOR, gs_sel); + else + vmcs_write16(HOST_GS_SELECTOR, 0); + host->gs_sel = gs_sel; + } + if (unlikely(fs_base != host->fs_base)) { + vmcs_writel(HOST_FS_BASE, fs_base); + host->fs_base = fs_base; + } + if (unlikely(gs_base != host->gs_base)) { + vmcs_writel(HOST_GS_BASE, gs_base); + host->gs_base = gs_base; + } +} + void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * when guest state is loaded. This happens when guest transitions * to/from long-mode by setting MSR_EFER.LMA. */ - if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) { - vmx->guest_msrs_dirty = false; + if (!vmx->guest_msrs_ready) { + vmx->guest_msrs_ready = true; for (i = 0; i < vmx->save_nmsrs; ++i) kvm_set_shared_msr(vmx->guest_msrs[i].index, vmx->guest_msrs[i].data, vmx->guest_msrs[i].mask); } - - if (vmx->loaded_cpu_state) + if (vmx->guest_state_loaded) return; - vmx->loaded_cpu_state = vmx->loaded_vmcs; - host_state = &vmx->loaded_cpu_state->host_state; + host_state = &vmx->loaded_vmcs->host_state; /* * Set host fs and gs selectors. Unfortunately, 22.2.3 does not @@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) gs_base = segment_base(gs_sel); #endif - if (unlikely(fs_sel != host_state->fs_sel)) { - if (!(fs_sel & 7)) - vmcs_write16(HOST_FS_SELECTOR, fs_sel); - else - vmcs_write16(HOST_FS_SELECTOR, 0); - host_state->fs_sel = fs_sel; - } - if (unlikely(gs_sel != host_state->gs_sel)) { - if (!(gs_sel & 7)) - vmcs_write16(HOST_GS_SELECTOR, gs_sel); - else - vmcs_write16(HOST_GS_SELECTOR, 0); - host_state->gs_sel = gs_sel; - } - if (unlikely(fs_base != host_state->fs_base)) { - vmcs_writel(HOST_FS_BASE, fs_base); - host_state->fs_base = fs_base; - } - if (unlikely(gs_base != host_state->gs_base)) { - vmcs_writel(HOST_GS_BASE, gs_base); - host_state->gs_base = gs_base; - } + vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base); + vmx->guest_state_loaded = true; } static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) { struct vmcs_host_state *host_state; - if (!vmx->loaded_cpu_state) + if (!vmx->guest_state_loaded) return; - WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs); - host_state = &vmx->loaded_cpu_state->host_state; + host_state = &vmx->loaded_vmcs->host_state; ++vmx->vcpu.stat.host_state_reload; - vmx->loaded_cpu_state = NULL; #ifdef CONFIG_X86_64 rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); @@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif load_fixmap_gdt(raw_smp_processor_id()); + vmx->guest_state_loaded = false; + vmx->guest_msrs_ready = false; } #ifdef CONFIG_X86_64 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) { preempt_disable(); - if (vmx->loaded_cpu_state) + if (vmx->guest_state_loaded) rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); preempt_enable(); return vmx->msr_guest_kernel_gs_base; @@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) { preempt_disable(); - if (vmx->loaded_cpu_state) + if (vmx->guest_state_loaded) wrmsrl(MSR_KERNEL_GS_BASE, data); preempt_enable(); vmx->msr_guest_kernel_gs_base = data; @@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) pi_set_on(pi_desc); } -/* - * Switches to specified vcpu, until a matching vcpu_put(), but assumes - * vcpu mutex is already taken. - */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; @@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (kvm_has_tsc_control && vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) decache_tsc_multiplier(vmx); +} + +/* + * Switches to specified vcpu, until a matching vcpu_put(), but assumes + * vcpu mutex is already taken. + */ +void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmx_vcpu_load_vmcs(vcpu, cpu); vmx_vcpu_pi_load(vcpu, cpu); + vmx->host_pkru = read_pkru(); vmx->host_debugctlmsr = get_debugctlmsr(); } @@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) pi_set_sn(pi_desc); } -void vmx_vcpu_put(struct kvm_vcpu *vcpu) +static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { vmx_vcpu_pi_put(vcpu); @@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; - vmx->guest_msrs_dirty = true; + vmx->guest_msrs_ready = false; if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); @@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP); break; - case MSR_IA32_POWER_CTL: - msr_info->data = vmx->msr_ia32_power_ctl; - break; case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && @@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data); case MSR_IA32_XSS: - if (!vmx_xsaves_supported()) + if (!vmx_xsaves_supported() || + (!msr_info->host_initiated && + !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) return 1; msr_info->data = vcpu->arch.ia32_xss; break; @@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; #endif case MSR_IA32_SYSENTER_CS: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_cs = data; vmcs_write32(GUEST_SYSENTER_CS, data); break; case MSR_IA32_SYSENTER_EIP: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_eip = data; vmcs_writel(GUEST_SYSENTER_EIP, data); break; case MSR_IA32_SYSENTER_ESP: + if (is_guest_mode(vcpu)) + get_vmcs12(vcpu)->guest_sysenter_esp = data; vmcs_writel(GUEST_SYSENTER_ESP, data); break; - case MSR_IA32_POWER_CTL: - vmx->msr_ia32_power_ctl = data; + case MSR_IA32_DEBUGCTLMSR: + if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls & + VM_EXIT_SAVE_DEBUG_CONTROLS) + get_vmcs12(vcpu)->guest_ia32_debugctl = data; + + ret = kvm_set_msr_common(vcpu, msr_info); break; + case MSR_IA32_BNDCFGS: if (!kvm_mpx_supported() || (!msr_info->host_initiated && @@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) MSR_TYPE_W); break; case MSR_IA32_CR_PAT: + if (!kvm_pat_valid(data)) + return 1; + + if (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) + get_vmcs12(vcpu)->guest_ia32_pat = data; + if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - if (!kvm_pat_valid(data)) - return 1; vmcs_write64(GUEST_IA32_PAT, data); vcpu->arch.pat = data; break; @@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; return vmx_set_vmx_msr(vcpu, msr_index, data); case MSR_IA32_XSS: - if (!vmx_xsaves_supported()) + if (!vmx_xsaves_supported() || + (!msr_info->host_initiated && + !(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)))) return 1; /* * The only supported bit as of Skylake is bit 8, but @@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) return -ENOMEM; loaded_vmcs->shadow_vmcs = NULL; + loaded_vmcs->hv_timer_soft_disabled = false; loaded_vmcs_init(loaded_vmcs); if (cpu_has_vmx_msr_bitmap()) { @@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) } memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state)); + memset(&loaded_vmcs->controls_shadow, 0, + sizeof(struct vmcs_controls_shadow)); return 0; @@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_dirty)) return; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); @@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); @@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) vmx_decache_cr3(vcpu); if (!(cr0 & X86_CR0_PG)) { /* From paging/starting to nonpaging */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | - (CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING)); + exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } else if (!is_paging(vcpu)) { /* From nonpaging to paging */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & - ~(CPU_BASED_CR3_LOAD_EXITING | - CPU_BASED_CR3_STORE_EXITING)); + exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING); vcpu->arch.cr0 = cr0; vmx_set_cr4(vcpu, kvm_read_cr4(vcpu)); } @@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { + struct vcpu_vmx *vmx = to_vmx(vcpu); /* * Pass through host's Machine Check Enable value to hw_cr4, which * is in force while we are in guest mode. Do not let guests control @@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); if (enable_unrestricted_guest) hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; - else if (to_vmx(vcpu)->rmode.vm86_active) + else if (vmx->rmode.vm86_active) hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; else hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { if (cr4 & X86_CR4_UMIP) { - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_DESC); + secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC); hw_cr4 &= ~X86_CR4_UMIP; } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_DESC); + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) { + secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC); + } } if (cr4 & X86_CR4_VMXE) { @@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; } - if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) + if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4)) return 1; vcpu->arch.cr4 = cr4; @@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu) u8 mode = 0; if (cpu_has_secondary_exec_ctrls() && - (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & + (secondary_exec_controls_get(to_vmx(vcpu)) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { mode |= MSR_BITMAP_MODE_X2APIC; if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) @@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) { u32 low32, high32; unsigned long tmpl; - struct desc_ptr dt; unsigned long cr0, cr3, cr4; cr0 = read_cr0(); @@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - store_idt(&dt); - vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ - vmx->host_idt_base = dt.address; + vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */ vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */ @@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); } -static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) +u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; @@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) if (!enable_vnmi) pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; - /* Enable the preemption timer dynamically */ - pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + if (!enable_preemption_timer) + pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + return pin_based_exec_ctrl; } @@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); if (cpu_has_secondary_exec_ctrls()) { if (kvm_vcpu_apicv_active(vcpu)) - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, + secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); else - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); } @@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ /* Control */ - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); vmx->hv_deadline_tsc = -1; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx)); + exec_controls_set(vmx, vmx_exec_control(vmx)); if (cpu_has_secondary_exec_ctrls()) { vmx_compute_secondary_exec_control(vmx); - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - vmx->secondary_exec_control); + secondary_exec_controls_set(vmx, vmx->secondary_exec_control); } if (kvm_vcpu_apicv_active(&vmx->vcpu)) { @@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); + vm_exit_controls_set(vmx, vmx_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ - vm_entry_controls_init(vmx, vmx_vmentry_ctrl()); + vm_entry_controls_set(vmx, vmx_vmentry_ctrl()); vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); @@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void enable_irq_window(struct kvm_vcpu *vcpu) { - vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); } static void enable_nmi_window(struct kvm_vcpu *vcpu) @@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } - vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4442,11 +4471,11 @@ static void kvm_machine_check(void) static int handle_machine_check(struct kvm_vcpu *vcpu) { - /* already handled by vcpu_run */ + /* handled by vmx_vcpu_run() */ return 1; } -static int handle_exception(struct kvm_vcpu *vcpu) +static int handle_exception_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) vect_info = vmx->idt_vectoring_info; intr_info = vmx->exit_intr_info; - if (is_machine_check(intr_info)) - return handle_machine_check(vcpu); - - if (is_nmi(intr_info)) - return 1; /* already handled by vmx_vcpu_run() */ + if (is_machine_check(intr_info) || is_nmi(intr_info)) + return 1; /* handled by handle_exception_nmi_irqoff() */ if (is_invalid_opcode(intr_info)) return handle_ud(vcpu); @@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; if (is_icebp(intr_info)) skip_emulated_instruction(vcpu); @@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_DEBUG; return 0; } else { - vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= DR6_BD | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); return 1; @@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) } if (vcpu->guest_debug == 0) { - vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_MOV_DR_EXITING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); /* * No more DR vmexits; force a reload of the debug registers @@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; - vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING); } static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) @@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) static int handle_interrupt_window(struct kvm_vcpu *vcpu) { - vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(!enable_vnmi); - vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, - CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); enum emulation_result err = EMULATE_DONE; int ret = 1; - u32 cpu_exec_ctrl; bool intr_window_requested; unsigned count = 130; @@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; + intr_window_requested = exec_controls_get(vmx) & + CPU_BASED_VIRTUAL_INTR_PENDING; while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) @@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) * is read even if it isn't needed (e.g., for type==all) */ if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), - vmx_instruction_info, false, &gva)) + vmx_instruction_info, false, + sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { @@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) static int handle_preemption_timer(struct kvm_vcpu *vcpu) { - if (!to_vmx(vcpu)->req_immediate_exit) + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!vmx->req_immediate_exit && + !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) kvm_lapic_expired_hv_timer(vcpu); + return 1; } @@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu) * to be done to userspace and return 0. */ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { - [EXIT_REASON_EXCEPTION_NMI] = handle_exception, + [EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, @@ -5952,6 +5979,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); u32 sec_exec_control; if (!lapic_in_kernel(vcpu)) @@ -5963,11 +5991,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; + vmx->nested.change_vmcs01_virtual_apic_mode = true; return; } - sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + sec_exec_control = secondary_exec_controls_get(vmx); sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); @@ -5989,7 +6017,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; break; } - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + secondary_exec_controls_set(vmx, sec_exec_control); vmx_update_msr_bitmap(vcpu); } @@ -6107,76 +6135,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); } -static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) +static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) { - u32 exit_intr_info = 0; - u16 basic_exit_reason = (u16)vmx->exit_reason; - - if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY - || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI)) - return; - - if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - vmx->exit_intr_info = exit_intr_info; + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* if exit due to PF check for async PF */ - if (is_page_fault(exit_intr_info)) + if (is_page_fault(vmx->exit_intr_info)) vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); /* Handle machine checks before interrupts are enabled */ - if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY || - is_machine_check(exit_intr_info)) + if (is_machine_check(vmx->exit_intr_info)) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if (is_nmi(exit_intr_info)) { + if (is_nmi(vmx->exit_intr_info)) { kvm_before_interrupt(&vmx->vcpu); asm("int $2"); kvm_after_interrupt(&vmx->vcpu); } } -static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) +static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) { - u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK)) - == (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) { - unsigned int vector; - unsigned long entry; - gate_desc *desc; - struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned int vector; + unsigned long entry; #ifdef CONFIG_X86_64 - unsigned long tmp; + unsigned long tmp; #endif + gate_desc *desc; + u32 intr_info; + + intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + if (WARN_ONCE(!is_external_intr(intr_info), + "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) + return; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - desc = (gate_desc *)vmx->host_idt_base + vector; - entry = gate_offset(desc); - asm volatile( + vector = intr_info & INTR_INFO_VECTOR_MASK; + desc = (gate_desc *)host_idt_base + vector; + entry = gate_offset(desc); + + kvm_before_interrupt(vcpu); + + asm volatile( #ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" - "push %[sp]\n\t" + "mov %%" _ASM_SP ", %[sp]\n\t" + "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" + "push $%c[ss]\n\t" + "push %[sp]\n\t" #endif - "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" - CALL_NOSPEC - : + "pushf\n\t" + __ASM_SIZE(push) " $%c[cs]\n\t" + CALL_NOSPEC + : #ifdef CONFIG_X86_64 - [sp]"=&r"(tmp), + [sp]"=&r"(tmp), #endif - ASM_CALL_CONSTRAINT - : - THUNK_TARGET(entry), - [ss]"i"(__KERNEL_DS), - [cs]"i"(__KERNEL_CS) - ); - } + ASM_CALL_CONSTRAINT + : + THUNK_TARGET(entry), + [ss]"i"(__KERNEL_DS), + [cs]"i"(__KERNEL_CS) + ); + + kvm_after_interrupt(vcpu); +} +STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); + +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + handle_external_interrupt_irqoff(vcpu); + else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) + handle_exception_nmi_irqoff(vmx); } -STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); static bool vmx_has_emulated_msr(int index) { @@ -6187,6 +6220,8 @@ static bool vmx_has_emulated_msr(int index) * real mode. */ return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + return nested; case MSR_AMD64_VIRT_SPEC_CTRL: /* This is AMD only. */ return false; @@ -6332,15 +6367,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host, false); } -static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val) -{ - vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val); - if (!vmx->loaded_vmcs->hv_timer_armed) - vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - vmx->loaded_vmcs->hv_timer_armed = true; -} - static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6348,11 +6374,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) u32 delta_tsc; if (vmx->req_immediate_exit) { - vmx_arm_hv_timer(vmx, 0); - return; - } - - if (vmx->hv_deadline_tsc != -1) { + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0); + vmx->loaded_vmcs->hv_timer_soft_disabled = false; + } else if (vmx->hv_deadline_tsc != -1) { tscl = rdtsc(); if (vmx->hv_deadline_tsc > tscl) /* set_hv_timer ensures the delta fits in 32-bits */ @@ -6361,14 +6385,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) else delta_tsc = 0; - vmx_arm_hv_timer(vmx, delta_tsc); - return; + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc); + vmx->loaded_vmcs->hv_timer_soft_disabled = false; + } else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) { + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1); + vmx->loaded_vmcs->hv_timer_soft_disabled = true; } - - if (vmx->loaded_vmcs->hv_timer_armed) - vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, - PIN_BASED_VMX_PREEMPTION_TIMER); - vmx->loaded_vmcs->hv_timer_armed = false; } void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) @@ -6401,8 +6423,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_write32(PLE_WINDOW, vmx->ple_window); } - if (vmx->nested.need_vmcs12_sync) - nested_sync_from_vmcs12(vcpu); + if (vmx->nested.need_vmcs12_to_shadow_sync) + nested_sync_vmcs12_to_shadow(vcpu); if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]); @@ -6440,7 +6462,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) atomic_switch_perf_msrs(vmx); - vmx_update_hv_timer(vcpu); + if (enable_preemption_timer) + vmx_update_hv_timer(vcpu); + + if (lapic_in_kernel(vcpu) && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + kvm_wait_lapic_expire(vcpu); /* * If this vCPU has touched SPEC_CTRL, restore the guest's value if @@ -6533,13 +6560,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->idt_vectoring_info = 0; vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); + if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) + kvm_machine_check(); + if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) return; vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } @@ -6630,6 +6659,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); + if (kvm_cstate_in_guest(kvm)) { + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); + vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R); + } vmx->msr_bitmap_mode = 0; vmx->loaded_vmcs = &vmx->vmcs01; @@ -6726,22 +6761,22 @@ static int vmx_vm_init(struct kvm *kvm) return 0; } -static void __init vmx_check_processor_compat(void *rtn) +static int __init vmx_check_processor_compat(void) { struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; - *(int *)rtn = 0; if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) - *(int *)rtn = -EIO; + return -EIO; if (nested) nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept, enable_apicv); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); - *(int *)rtn = -EIO; + return -EIO; } + return 0; } static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) @@ -6795,7 +6830,7 @@ static int vmx_get_lpage_level(void) return PT_PDPE_LEVEL; } -static void vmcs_set_secondary_exec_control(u32 new_ctl) +static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx) { /* * These bits in the secondary execution controls field @@ -6809,10 +6844,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC; - u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + u32 new_ctl = vmx->secondary_exec_control; + u32 cur_ctl = secondary_exec_controls_get(vmx); - vmcs_write32(SECONDARY_VM_EXEC_CONTROL, - (new_ctl & ~mask) | (cur_ctl & mask)); + secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask)); } /* @@ -6950,7 +6985,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (cpu_has_secondary_exec_ctrls()) { vmx_compute_secondary_exec_control(vmx); - vmcs_set_secondary_exec_control(vmx->secondary_exec_control); + vmcs_set_secondary_exec_control(vmx); } if (nested_vmx_allowed(vcpu)) @@ -7424,10 +7459,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) static __init int hardware_setup(void) { unsigned long host_bndcfgs; + struct desc_ptr dt; int r, i; rdmsrl_safe(MSR_EFER, &host_efer); + store_idt(&dt); + host_idt_base = dt.address; + for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) kvm_define_shared_msr(i, vmx_msr_index[i]); @@ -7531,17 +7570,33 @@ static __init int hardware_setup(void) } if (!cpu_has_vmx_preemption_timer()) - kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; + enable_preemption_timer = false; - if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) { + if (enable_preemption_timer) { + u64 use_timer_freq = 5000ULL * 1000 * 1000; u64 vmx_msr; rdmsrl(MSR_IA32_VMX_MISC, vmx_msr); cpu_preemption_timer_multi = vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; - } else { + + if (tsc_khz) + use_timer_freq = (u64)tsc_khz * 1000; + use_timer_freq >>= cpu_preemption_timer_multi; + + /* + * KVM "disables" the preemption timer by setting it to its max + * value. Don't use the timer if it might cause spurious exits + * at a rate faster than 0.1 Hz (of uninterrupted guest time). + */ + if (use_timer_freq > 0xffffffffu / 10) + enable_preemption_timer = false; + } + + if (!enable_preemption_timer) { kvm_x86_ops->set_hv_timer = NULL; kvm_x86_ops->cancel_hv_timer = NULL; + kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit; } kvm_set_posted_intr_wakeup_handler(wakeup_handler); @@ -7683,7 +7738,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_tdp_cr3 = vmx_set_cr3, .check_intercept = vmx_check_intercept, - .handle_external_intr = vmx_handle_external_intr, + .handle_exit_irqoff = vmx_handle_exit_irqoff, .mpx_supported = vmx_mpx_supported, .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 61128b48c503..82d0bc3a4d52 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -109,14 +109,21 @@ struct nested_vmx { * to guest memory during VM exit. */ struct vmcs12 *cached_shadow_vmcs12; + /* * Indicates if the shadow vmcs or enlightened vmcs must be updated * with the data held by struct vmcs12. */ - bool need_vmcs12_sync; + bool need_vmcs12_to_shadow_sync; bool dirty_vmcs12; /* + * Indicates lazily loaded guest state has not yet been decached from + * vmcs02. + */ + bool need_sync_vmcs02_to_vmcs12_rare; + + /* * vmcs02 has been initialized, i.e. state that is constant for * vmcs02 has been written to the backing VMCS. Initialization * is delayed until L1 actually attempts to run a nested VM. @@ -180,14 +187,24 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; u8 fail; u8 msr_bitmap_mode; + + /* + * If true, host state has been stored in vmx->loaded_vmcs for + * the CPU registers that only need to be switched when transitioning + * to/from the kernel, and the registers have been loaded with guest + * values. If false, host state is loaded in the CPU registers + * and vmx->loaded_vmcs->host_state is invalid. + */ + bool guest_state_loaded; + u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; + struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; - bool guest_msrs_dirty; - unsigned long host_idt_base; + bool guest_msrs_ready; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; @@ -195,21 +212,15 @@ struct vcpu_vmx { u64 spec_ctrl; - u32 vm_entry_controls_shadow; - u32 vm_exit_controls_shadow; u32 secondary_exec_control; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested - * guest (L2), it points to a different VMCS. loaded_cpu_state points - * to the VMCS whose state is loaded into the CPU registers that only - * need to be switched when transitioning to/from the kernel; a NULL - * value indicates that host state is loaded. + * guest (L2), it points to a different VMCS. */ struct loaded_vmcs vmcs01; struct loaded_vmcs *loaded_vmcs; - struct loaded_vmcs *loaded_cpu_state; struct msr_autoload { struct vmx_msrs guest; @@ -260,8 +271,6 @@ struct vcpu_vmx { unsigned long host_debugctlmsr; - u64 msr_ia32_power_ctl; - /* * Only bits masked by msr_ia32_feature_control_valid_bits can be set in * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included @@ -292,12 +301,14 @@ struct kvm_vmx { }; bool nested_vmx_allowed(struct kvm_vcpu *vcpu); +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void vmx_vcpu_put(struct kvm_vcpu *vcpu); int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); +void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel, + unsigned long fs_base, unsigned long gs_base); int vmx_get_cpl(struct kvm_vcpu *vcpu); unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu); void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); @@ -376,69 +387,31 @@ static inline u8 vmx_get_rvi(void) return vmcs_read16(GUEST_INTR_STATUS) & 0xff; } -static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx) -{ - vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS); -} - -static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) -{ - vmcs_write32(VM_ENTRY_CONTROLS, val); - vmx->vm_entry_controls_shadow = val; -} - -static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) -{ - if (vmx->vm_entry_controls_shadow != val) - vm_entry_controls_init(vmx, val); -} - -static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) -{ - return vmx->vm_entry_controls_shadow; -} - -static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); -} - -static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); -} - -static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx) -{ - vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS); -} - -static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) -{ - vmcs_write32(VM_EXIT_CONTROLS, val); - vmx->vm_exit_controls_shadow = val; -} - -static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) -{ - if (vmx->vm_exit_controls_shadow != val) - vm_exit_controls_init(vmx, val); -} - -static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) -{ - return vmx->vm_exit_controls_shadow; -} - -static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); -} - -static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) -{ - vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); +#define BUILD_CONTROLS_SHADOW(lname, uname) \ +static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \ +{ \ + if (vmx->loaded_vmcs->controls_shadow.lname != val) { \ + vmcs_write32(uname, val); \ + vmx->loaded_vmcs->controls_shadow.lname = val; \ + } \ +} \ +static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \ +{ \ + return vmx->loaded_vmcs->controls_shadow.lname; \ +} \ +static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \ +{ \ + lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \ +} \ +static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \ +{ \ + lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \ } +BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS) +BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS) +BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) +BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) +BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) { @@ -468,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void) } u32 vmx_exec_control(struct vcpu_vmx *vmx); +u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx); static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9857992d4e58..4a0b74ecd1de 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -67,6 +67,7 @@ #include <asm/mshyperv.h> #include <asm/hypervisor.h> #include <asm/intel_pt.h> +#include <clocksource/hyperv_timer.h> #define CREATE_TRACE_POINTS #include "trace.h" @@ -716,7 +717,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu)) + if (!is_pae_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -959,8 +960,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu) && (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) return 1; - else if (is_pae(vcpu) && is_paging(vcpu) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) + else if (is_pae_paging(vcpu) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); @@ -1173,7 +1174,28 @@ static u32 emulated_msrs[] = { MSR_AMD64_VIRT_SPEC_CTRL, MSR_IA32_POWER_CTL, + /* + * The following list leaves out MSRs whose values are determined + * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. + * We always support the "true" VMX control MSRs, even if the host + * processor does not, so I am putting these registers here rather + * than in msrs_to_save. + */ + MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_TRUE_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_EXIT_CTLS, + MSR_IA32_VMX_TRUE_ENTRY_CTLS, + MSR_IA32_VMX_MISC, + MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_VMCS_ENUM, + MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + MSR_K7_HWCR, + MSR_KVM_POLL_CONTROL, }; static unsigned num_emulated_msrs; @@ -1209,11 +1231,12 @@ static u32 msr_based_features[] = { static unsigned int num_msr_based_features; -u64 kvm_get_arch_capabilities(void) +static u64 kvm_get_arch_capabilities(void) { - u64 data; + u64 data = 0; - rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); /* * If we're doing cache flushes (either "always" or "cond") @@ -1229,7 +1252,6 @@ u64 kvm_get_arch_capabilities(void) return data; } -EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); static int kvm_get_msr_feature(struct kvm_msr_entry *msr) { @@ -1554,7 +1576,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) vcpu->arch.tsc_always_catchup = 1; return 0; } else { - WARN(1, "user requested TSC rate below hardware speed\n"); + pr_warn_ratelimited("user requested TSC rate below hardware speed\n"); return -1; } } @@ -1564,8 +1586,8 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) user_tsc_khz, tsc_khz); if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { - WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); + pr_warn_ratelimited("Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); return -1; } @@ -1728,7 +1750,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); - ns = ktime_get_boot_ns(); + ns = ktime_get_boottime_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { @@ -2070,7 +2092,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) spin_lock(&ka->pvclock_gtod_sync_lock); if (!ka->use_master_clock) { spin_unlock(&ka->pvclock_gtod_sync_lock); - return ktime_get_boot_ns() + ka->kvmclock_offset; + return ktime_get_boottime_ns() + ka->kvmclock_offset; } hv_clock.tsc_timestamp = ka->master_cycle_now; @@ -2086,7 +2108,7 @@ u64 get_kvmclock_ns(struct kvm *kvm) &hv_clock.tsc_to_system_mul); ret = __pvclock_read_cycles(&hv_clock, rdtsc()); } else - ret = ktime_get_boot_ns() + ka->kvmclock_offset; + ret = ktime_get_boottime_ns() + ka->kvmclock_offset; put_cpu(); @@ -2185,7 +2207,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) } if (!use_master_clock) { host_tsc = rdtsc(); - kernel_ns = ktime_get_boot_ns(); + kernel_ns = ktime_get_boottime_ns(); } tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); @@ -2544,13 +2566,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } break; case MSR_IA32_MISC_ENABLE: - vcpu->arch.ia32_misc_enable_msr = data; + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) && + ((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) { + if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3)) + return 1; + vcpu->arch.ia32_misc_enable_msr = data; + kvm_update_cpuid(vcpu); + } else { + vcpu->arch.ia32_misc_enable_msr = data; + } break; case MSR_IA32_SMBASE: if (!msr_info->host_initiated) return 1; vcpu->arch.smbase = data; break; + case MSR_IA32_POWER_CTL: + vcpu->arch.msr_ia32_power_ctl = data; + break; case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr_info); break; @@ -2625,6 +2658,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; + case MSR_KVM_POLL_CONTROL: + /* only enable bit supported */ + if (data & (-1ULL << 1)) + return 1; + + vcpu->arch.msr_kvm_poll_control = data; + break; + case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: @@ -2802,6 +2843,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vcpu->arch.arch_capabilities; break; + case MSR_IA32_POWER_CTL: + msr_info->data = vcpu->arch.msr_ia32_power_ctl; + break; case MSR_IA32_TSC: msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; break; @@ -2874,6 +2918,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_PV_EOI_EN: msr_info->data = vcpu->arch.pv_eoi.msr_val; break; + case MSR_KVM_POLL_CONTROL: + msr_info->data = vcpu->arch.msr_kvm_poll_control; + break; case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MCG_CAP: @@ -3083,6 +3130,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_PMU_EVENT_FILTER: case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: @@ -3095,7 +3143,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_TSC_STABLE; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE; + r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | + KVM_X86_DISABLE_EXITS_CSTATE; if(kvm_can_mwait_in_guest()) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; @@ -4612,6 +4661,8 @@ split_irqchip_unlock: kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: @@ -4926,6 +4977,9 @@ set_identity_unlock: r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); break; } + case KVM_SET_PMU_EVENT_FILTER: + r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); + break; default: r = -ENOTTY; } @@ -6378,7 +6432,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) vcpu->arch.db); if (dr6 != 0) { - vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 &= ~DR_TRAP_BITS; vcpu->arch.dr6 |= dr6 | DR6_RTM; kvm_queue_exception(vcpu, DB_VECTOR); *r = EMULATE_DONE; @@ -6705,7 +6759,7 @@ static void kvm_hyperv_tsc_notifier(void) struct kvm_vcpu *vcpu; int cpu; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_make_mclock_inprogress_request(kvm); @@ -6731,7 +6785,7 @@ static void kvm_hyperv_tsc_notifier(void) spin_unlock(&ka->pvclock_gtod_sync_lock); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } #endif @@ -6782,17 +6836,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu) smp_call_function_single(cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6907,35 +6961,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .handle_intel_pt_intr = kvm_handle_intel_pt_intr, }; -static void kvm_set_mmio_spte_mask(void) -{ - u64 mask; - int maxphyaddr = boot_cpu_data.x86_phys_bits; - - /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. - */ - - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. - */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) - mask &= ~1ull; - - kvm_mmu_set_mmio_spte_mask(mask, mask); -} - #ifdef CONFIG_X86_64 static void pvclock_gtod_update_fn(struct work_struct *work) { @@ -6944,12 +6969,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); @@ -7032,8 +7057,6 @@ int kvm_arch_init(void *opaque) if (r) goto out_free_percpu; - kvm_set_mmio_spte_mask(); - kvm_x86_ops = ops; kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, @@ -7172,6 +7195,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); } +static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id) +{ + struct kvm_vcpu *target = NULL; + struct kvm_apic_map *map; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id]) + target = map->phys_map[dest_id]->vcpu; + + rcu_read_unlock(); + + if (target) + kvm_vcpu_yield_to(target); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -7218,6 +7258,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_SEND_IPI: ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit); break; + case KVM_HC_SCHED_YIELD: + kvm_sched_yield(vcpu->kvm, a0); + ret = 0; + break; default: ret = -KVM_ENOSYS; break; @@ -7950,9 +7994,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } trace_kvm_entry(vcpu->vcpu_id); - if (lapic_in_kernel(vcpu) && - vcpu->arch.apic->lapic_timer.timer_advance_ns) - wait_lapic_expire(vcpu); guest_enter_irqoff(); fpregs_assert_state_consistent(); @@ -8001,13 +8042,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_before_interrupt(vcpu); - kvm_x86_ops->handle_external_intr(vcpu); - kvm_after_interrupt(vcpu); + kvm_x86_ops->handle_exit_irqoff(vcpu); + /* + * Consume any pending interrupts, including the possible source of + * VM-Exit on SVM and any ticks that occur between VM-Exit and now. + * An instruction is required after local_irq_enable() to fully unblock + * interrupts on processors that implement an interrupt shadow, the + * stat.exits increment will do nicely. + */ + kvm_before_interrupt(vcpu); + local_irq_enable(); ++vcpu->stat.exits; + local_irq_disable(); + kvm_after_interrupt(vcpu); guest_exit_irqoff(); + if (lapic_in_kernel(vcpu)) { + s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; + if (delta != S64_MIN) { + trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta); + vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN; + } + } local_irq_enable(); preempt_enable(); @@ -8593,7 +8650,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) { + if (is_pae_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } @@ -8874,6 +8931,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) msr.host_initiated = true; kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + + /* poll control enabled by default */ + vcpu->arch.msr_kvm_poll_control = 1; + mutex_unlock(&vcpu->mutex); if (!kvmclock_periodic_sync) @@ -9015,7 +9076,7 @@ int kvm_arch_hardware_enable(void) * before any KVM threads can be running. Unfortunately, we can't * bring the TSCs fully up to date with real time, as we aren't yet far * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boot_ns() will be using boot + * elapsed; our helper function, ktime_get_boottime_ns() will be using boot * variables that haven't been updated yet. * * So we simply find the maximum observed TSC above, then record the @@ -9106,9 +9167,9 @@ void kvm_arch_hardware_unsetup(void) kvm_x86_ops->hardware_unsetup(); } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - kvm_x86_ops->check_processor_compatibility(rtn); + return kvm_x86_ops->check_processor_compatibility(); } bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) @@ -9243,7 +9304,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) mutex_init(&kvm->arch.apic_map_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); - kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); + kvm->arch.kvmclock_offset = -ktime_get_boottime_ns(); pvclock_update_vm_gtod_copy(kvm); kvm->arch.guest_can_read_msr_platform_info = true; @@ -9380,6 +9441,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); + kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); kvm_hv_destroy_vm(kvm); @@ -9788,6 +9850,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) sizeof(u32)); } +static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) + return false; + + if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || + (vcpu->arch.apf.send_user_only && + kvm_x86_ops->get_cpl(vcpu) == 0)) + return false; + + return true; +} + +bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) +{ + if (unlikely(!lapic_in_kernel(vcpu) || + kvm_event_needs_reinjection(vcpu) || + vcpu->arch.exception.pending)) + return false; + + if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu)) + return false; + + /* + * If interrupts are off we cannot even use an artificial + * halt state. + */ + return kvm_x86_ops->interrupt_allowed(vcpu); +} + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { @@ -9796,11 +9888,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, trace_kvm_async_pf_not_present(work->arch.token, work->gva); kvm_add_async_pf_gfn(vcpu, work->arch.gfn); - if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || - (vcpu->arch.apf.send_user_only && - kvm_x86_ops->get_cpl(vcpu) == 0)) - kvm_make_request(KVM_REQ_APF_HALT, vcpu); - else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { + if (kvm_can_deliver_async_pf(vcpu) && + !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; fault.error_code = 0; @@ -9808,6 +9897,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, fault.address = work->arch.token; fault.async_page_fault = true; kvm_inject_page_fault(vcpu, &fault); + } else { + /* + * It is not possible to deliver a paravirtualized asynchronous + * page fault, but putting the guest in an artificial halt state + * can be beneficial nevertheless: if an interrupt arrives, we + * can deliver it timely and perhaps the guest will schedule + * another process. When the instruction that triggered a page + * fault is retried, hopefully the page will be ready in the host. + */ + kvm_make_request(KVM_REQ_APF_HALT, vcpu); } } @@ -9948,6 +10047,13 @@ bool kvm_vector_hashing_enabled(void) } EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled); +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.msr_kvm_poll_control & 1) == 0; +} +EXPORT_SYMBOL_GPL(kvm_arch_no_poll); + + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index a470ff0868c5..e08a12892e8b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); } +static inline bool is_pae_paging(struct kvm_vcpu *vcpu) +{ + return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); +} + static inline u32 bit(int bitno) { return 1 << (bitno & 31); @@ -333,6 +338,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm) return kvm->arch.pause_in_guest; } +static inline bool kvm_cstate_in_guest(struct kvm *kvm) +{ + return kvm->arch.cstate_in_guest; +} + DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c index 1811fa4a1b1a..7c48ff4ae8d1 100644 --- a/arch/x86/lib/cache-smp.c +++ b/arch/x86/lib/cache-smp.c @@ -15,6 +15,7 @@ EXPORT_SYMBOL(wbinvd_on_cpu); int wbinvd_on_all_cpus(void) { - return on_each_cpu(__wbinvd, NULL, 1); + on_each_cpu(__wbinvd, NULL, 1); + return 0; } EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index c6f4982d5401..39001a401eff 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -26,8 +26,6 @@ static int ptdump_curknl_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump_curknl); #ifdef CONFIG_PAGE_TABLE_ISOLATION -static struct dentry *pe_curusr; - static int ptdump_curusr_show(struct seq_file *m, void *v) { if (current->mm->pgd) { @@ -42,8 +40,6 @@ DEFINE_SHOW_ATTRIBUTE(ptdump_curusr); #endif #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) -static struct dentry *pe_efi; - static int ptdump_efi_show(struct seq_file *m, void *v) { if (efi_mm.pgd) @@ -54,41 +50,24 @@ static int ptdump_efi_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump_efi); #endif -static struct dentry *dir, *pe_knl, *pe_curknl; +static struct dentry *dir; static int __init pt_dump_debug_init(void) { dir = debugfs_create_dir("page_tables", NULL); - if (!dir) - return -ENOMEM; - - pe_knl = debugfs_create_file("kernel", 0400, dir, NULL, - &ptdump_fops); - if (!pe_knl) - goto err; - pe_curknl = debugfs_create_file("current_kernel", 0400, - dir, NULL, &ptdump_curknl_fops); - if (!pe_curknl) - goto err; + debugfs_create_file("kernel", 0400, dir, NULL, &ptdump_fops); + debugfs_create_file("current_kernel", 0400, dir, NULL, + &ptdump_curknl_fops); #ifdef CONFIG_PAGE_TABLE_ISOLATION - pe_curusr = debugfs_create_file("current_user", 0400, - dir, NULL, &ptdump_curusr_fops); - if (!pe_curusr) - goto err; + debugfs_create_file("current_user", 0400, dir, NULL, + &ptdump_curusr_fops); #endif - #if defined(CONFIG_EFI) && defined(CONFIG_X86_64) - pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops); - if (!pe_efi) - goto err; + debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops); #endif - return 0; -err: - debugfs_remove_recursive(dir); - return -ENOMEM; } static void __exit pt_dump_debug_exit(void) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 46df4c6aae46..794f364cb882 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -710,6 +710,10 @@ static void set_signal_archinfo(unsigned long address, * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to * kernel addresses are always protection faults. + * + * NB: This means that failed vsyscalls with vsyscall=none + * will have the PROT bit. This doesn't leak any + * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) error_code |= X86_PF_PROT; @@ -756,8 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, set_signal_archinfo(address, error_code); /* XXX: hwpoison faults will set the wrong code. */ - force_sig_fault(signal, si_code, (void __user *)address, - tsk); + force_sig_fault(signal, si_code, (void __user *)address); } /* @@ -918,7 +921,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (si_code == SEGV_PKUERR) force_sig_pkuerr((void __user *)address, pkey); - force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address); return; } @@ -1015,8 +1018,6 @@ static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, vm_fault_t fault) { - struct task_struct *tsk = current; - /* Kernel mode? Handle exceptions or die: */ if (!(error_code & X86_PF_USER)) { no_context(regs, error_code, address, SIGBUS, BUS_ADRERR); @@ -1031,6 +1032,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + struct task_struct *tsk = current; unsigned lsb = 0; pr_err( @@ -1040,11 +1042,11 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); if (fault & VM_FAULT_HWPOISON) lsb = PAGE_SHIFT; - force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk); + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); return; } #endif - force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); } static noinline void @@ -1369,16 +1371,18 @@ void do_user_addr_fault(struct pt_regs *regs, #ifdef CONFIG_X86_64 /* - * Instruction fetch faults in the vsyscall page might need - * emulation. The vsyscall page is at a high address - * (>PAGE_OFFSET), but is considered to be part of the user - * address space. + * Faults in the vsyscall page might need emulation. The + * vsyscall page is at a high address (>PAGE_OFFSET), but is + * considered to be part of the user address space. * * The vsyscall page does not have a "real" VMA, so do this * emulation before we go searching for VMAs. + * + * PKRU never rejects instruction fetches, so we don't need + * to consider the PF_PK bit. */ - if ((hw_error_code & X86_PF_INSTR) && is_vsyscall_vaddr(address)) { - if (emulate_vsyscall(regs, address)) + if (is_vsyscall_vaddr(address)) { + if (emulate_vsyscall(hw_error_code, regs, address)) return; } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 693aaf28d5fe..0f01c7b1d217 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -671,23 +671,25 @@ static unsigned long __meminit phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long page_size_mask, bool init) { - unsigned long paddr_next, paddr_last = paddr_end; - unsigned long vaddr = (unsigned long)__va(paddr); - int i = p4d_index(vaddr); + unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last; + + paddr_last = paddr_end; + vaddr = (unsigned long)__va(paddr); + vaddr_end = (unsigned long)__va(paddr_end); if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask, init); - for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { - p4d_t *p4d; + for (; vaddr < vaddr_end; vaddr = vaddr_next) { + p4d_t *p4d = p4d_page + p4d_index(vaddr); pud_t *pud; - vaddr = (unsigned long)__va(paddr); - p4d = p4d_page + p4d_index(vaddr); - paddr_next = (paddr & P4D_MASK) + P4D_SIZE; + vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE; + paddr = __pa(vaddr); if (paddr >= paddr_end) { + paddr_next = __pa(vaddr_next); if (!after_bootmem && !e820__mapped_any(paddr & P4D_MASK, paddr_next, E820_TYPE_RAM) && @@ -699,13 +701,13 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, if (!p4d_none(*p4d)) { pud = pud_offset(p4d, 0); - paddr_last = phys_pud_init(pud, paddr, paddr_end, - page_size_mask, init); + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), + page_size_mask, init); continue; } pud = alloc_low_page(); - paddr_last = phys_pud_init(pud, paddr, paddr_end, + paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end), page_size_mask, init); spin_lock(&init_mm.page_table_lock); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 4b6423e7bd21..e500f1df1140 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -28,9 +28,11 @@ #include "physaddr.h" -struct ioremap_mem_flags { - bool system_ram; - bool desc_other; +/* + * Descriptor controlling ioremap() behavior. + */ +struct ioremap_desc { + unsigned int flags; }; /* @@ -62,13 +64,14 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static bool __ioremap_check_ram(struct resource *res) +/* Does the range (or a subset of) contain normal RAM? */ +static unsigned int __ioremap_check_ram(struct resource *res) { unsigned long start_pfn, stop_pfn; unsigned long i; if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) - return false; + return 0; start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; stop_pfn = (res->end + 1) >> PAGE_SHIFT; @@ -76,28 +79,44 @@ static bool __ioremap_check_ram(struct resource *res) for (i = 0; i < (stop_pfn - start_pfn); ++i) if (pfn_valid(start_pfn + i) && !PageReserved(pfn_to_page(start_pfn + i))) - return true; + return IORES_MAP_SYSTEM_RAM; } - return false; + return 0; } -static int __ioremap_check_desc_other(struct resource *res) +/* + * In a SEV guest, NONE and RESERVED should not be mapped encrypted because + * there the whole memory is already encrypted. + */ +static unsigned int __ioremap_check_encrypted(struct resource *res) { - return (res->desc != IORES_DESC_NONE); + if (!sev_active()) + return 0; + + switch (res->desc) { + case IORES_DESC_NONE: + case IORES_DESC_RESERVED: + break; + default: + return IORES_MAP_ENCRYPTED; + } + + return 0; } -static int __ioremap_res_check(struct resource *res, void *arg) +static int __ioremap_collect_map_flags(struct resource *res, void *arg) { - struct ioremap_mem_flags *flags = arg; + struct ioremap_desc *desc = arg; - if (!flags->system_ram) - flags->system_ram = __ioremap_check_ram(res); + if (!(desc->flags & IORES_MAP_SYSTEM_RAM)) + desc->flags |= __ioremap_check_ram(res); - if (!flags->desc_other) - flags->desc_other = __ioremap_check_desc_other(res); + if (!(desc->flags & IORES_MAP_ENCRYPTED)) + desc->flags |= __ioremap_check_encrypted(res); - return flags->system_ram && flags->desc_other; + return ((desc->flags & (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)) == + (IORES_MAP_SYSTEM_RAM | IORES_MAP_ENCRYPTED)); } /* @@ -106,15 +125,15 @@ static int __ioremap_res_check(struct resource *res, void *arg) * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, - struct ioremap_mem_flags *flags) + struct ioremap_desc *desc) { u64 start, end; start = (u64)addr; end = start + size - 1; - memset(flags, 0, sizeof(*flags)); + memset(desc, 0, sizeof(struct ioremap_desc)); - walk_mem_res(start, end, flags, __ioremap_res_check); + walk_mem_res(start, end, desc, __ioremap_collect_map_flags); } /* @@ -131,15 +150,15 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, * have to convert them into an offset in a page-aligned mapping, but the * caller shouldn't need to know that small detail. */ -static void __iomem *__ioremap_caller(resource_size_t phys_addr, - unsigned long size, enum page_cache_mode pcm, - void *caller, bool encrypted) +static void __iomem * +__ioremap_caller(resource_size_t phys_addr, unsigned long size, + enum page_cache_mode pcm, void *caller, bool encrypted) { unsigned long offset, vaddr; resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; - struct ioremap_mem_flags mem_flags; + struct ioremap_desc io_desc; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -158,12 +177,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } - __ioremap_check_mem(phys_addr, size, &mem_flags); + __ioremap_check_mem(phys_addr, size, &io_desc); /* * Don't allow anybody to remap normal RAM that we're using.. */ - if (mem_flags.system_ram) { + if (io_desc.flags & IORES_MAP_SYSTEM_RAM) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -201,7 +220,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, * resulting mapping. */ prot = PAGE_KERNEL_IO; - if ((sev_active() && mem_flags.desc_other) || encrypted) + if ((io_desc.flags & IORES_MAP_ENCRYPTED) || encrypted) prot = pgprot_encrypted(prot); switch (pcm) { diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index dddcd2a1afdb..e2b0e2ac07bb 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -70,6 +70,19 @@ struct sme_populate_pgd_data { unsigned long vaddr_end; }; +/* + * This work area lives in the .init.scratch section, which lives outside of + * the kernel proper. It is sized to hold the intermediate copy buffer and + * more than enough pagetable pages. + * + * By using this section, the kernel can be encrypted in place and it + * avoids any possibility of boot parameters or initramfs images being + * placed such that the in-place encryption logic overwrites them. This + * section is 2MB aligned to allow for simple pagetable setup using only + * PMD entries (see vmlinux.lds.S). + */ +static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch); + static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; static char sme_cmdline_off[] __initdata = "off"; @@ -311,8 +324,13 @@ void __init sme_encrypt_kernel(struct boot_params *bp) } #endif - /* Set the encryption workarea to be immediately after the kernel */ - workarea_start = kernel_end; + /* + * We're running identity mapped, so we must obtain the address to the + * SME encryption workarea using rip-relative addressing. + */ + asm ("lea sme_workarea(%%rip), %0" + : "=r" (workarea_start) + : "p" (sme_workarea)); /* * Calculate required number of workarea bytes needed: diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 0d1c47cbbdd6..895fb7a9294d 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -912,7 +912,7 @@ void mpx_notify_unmap(struct mm_struct *mm, unsigned long start, ret = mpx_unmap_tables(mm, start, end); if (ret) - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); } /* MPX cannot handle addresses above 47 bits yet. */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1f67b1e15bf6..44816ff6411f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; EXPORT_SYMBOL(physical_mask); #endif -#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) - #ifdef CONFIG_HIGHPTE -#define PGALLOC_USER_GFP __GFP_HIGHMEM +#define PGTABLE_HIGHMEM __GFP_HIGHMEM #else -#define PGALLOC_USER_GFP 0 +#define PGTABLE_HIGHMEM 0 #endif -gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; - -pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT); -} +gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM; pgtable_t pte_alloc_one(struct mm_struct *mm) { - struct page *pte; - - pte = alloc_pages(__userpte_alloc_gfp, 0); - if (!pte) - return NULL; - if (!pgtable_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; + return __pte_alloc_one(mm, __userpte_alloc_gfp); } static int __init setup_userpte(char *arg) @@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) { int i; bool failed = false; - gfp_t gfp = PGALLOC_GFP; + gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp &= ~__GFP_ACCOUNT; @@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void) * We allocate one page for pgd. */ if (!SHARED_KERNEL_PMD) - return (pgd_t *)__get_free_pages(PGALLOC_GFP, + return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, PGD_ALLOCATION_ORDER); /* * Now PAE kernel is not running as a Xen domain. We can allocate * a 32-byte slab for pgd to save memory space. */ - return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); + return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER); } static inline void _pgd_free(pgd_t *pgd) @@ -424,7 +408,8 @@ void __init pgd_cache_init(void) static inline pgd_t *_pgd_alloc(void) { - return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER); + return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, + PGD_ALLOCATION_ORDER); } static inline void _pgd_free(pgd_t *pgd) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 91f6db92554c..4de9704c4aaf 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -712,7 +712,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, } /* - * See Documentation/x86/tlb.txt for details. We choose 33 + * See Documentation/x86/tlb.rst for details. We choose 33 * because it is large enough to cover the vast majority (at * least 95%) of allocations, and is small enough that we are * confident it will not cause too much overhead. Each single diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index b29e82f190c7..393d251798c0 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -253,13 +253,14 @@ static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk, /* dst = src */ static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[], const u8 src[], bool dstk, - bool sstk, u8 **pprog) + bool sstk, u8 **pprog, + const struct bpf_prog_aux *aux) { emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog); if (is64) /* complete 8 byte move */ emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog); - else + else if (!aux->verifier_zext) /* zero out high 4 bytes */ emit_ia32_mov_i(dst_hi, 0, dstk, pprog); } @@ -313,7 +314,8 @@ static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk, } static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; int cnt = 0; @@ -334,12 +336,14 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, */ EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 32: - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 64: /* nop */ @@ -358,7 +362,8 @@ static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val, } static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; int cnt = 0; @@ -380,16 +385,18 @@ static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val, EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 32: /* Emit 'bswap eax' to swap lower 4 bytes */ EMIT1(0x0F); EMIT1(add_1reg(0xC8, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); + if (!aux->verifier_zext) + /* xor dreg_hi,dreg_hi */ + EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); break; case 64: /* Emit 'bswap eax' to swap lower 4 bytes */ @@ -569,7 +576,7 @@ static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op, static inline void emit_ia32_alu_r64(const bool is64, const u8 op, const u8 dst[], const u8 src[], bool dstk, bool sstk, - u8 **pprog) + u8 **pprog, const struct bpf_prog_aux *aux) { u8 *prog = *pprog; @@ -577,7 +584,7 @@ static inline void emit_ia32_alu_r64(const bool is64, const u8 op, if (is64) emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk, &prog); - else + else if (!aux->verifier_zext) emit_ia32_mov_i(dst_hi, 0, dstk, &prog); *pprog = prog; } @@ -668,7 +675,8 @@ static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op, /* ALU operation (64 bit) */ static inline void emit_ia32_alu_i64(const bool is64, const u8 op, const u8 dst[], const u32 val, - bool dstk, u8 **pprog) + bool dstk, u8 **pprog, + const struct bpf_prog_aux *aux) { u8 *prog = *pprog; u32 hi = 0; @@ -679,7 +687,7 @@ static inline void emit_ia32_alu_i64(const bool is64, const u8 op, emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog); if (is64) emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog); - else + else if (!aux->verifier_zext) emit_ia32_mov_i(dst_hi, 0, dstk, &prog); *pprog = prog; @@ -724,9 +732,6 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -745,78 +750,22 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* shl dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); - /* mov ebx,dreg_lo */ - EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* shld dreg_hi,dreg_lo,cl */ + EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo)); /* shl dreg_lo,cl */ EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); + /* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */ - /* shr ebx,cl */ - EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); - /* or dreg_hi,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; - - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shl dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); /* mov dreg_hi,dreg_lo */ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); - - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; /* xor dreg_lo,dreg_lo */ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ @@ -836,9 +785,6 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -857,79 +803,23 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); - /* ashr dreg_hi,cl */ + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); + /* sar dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; + /* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */ - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (5 bytes) when < 32 */ + EMIT2(IA32_JB, 5); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* ashr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - /* ashr dreg_hi,imm8 */ + /* sar dreg_hi,31 */ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* ashr dreg_hi,imm8 */ - EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - /* mov dreg_lo,dreg_hi */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), @@ -948,9 +838,6 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -969,77 +856,23 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); /* shr dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); + /* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */ - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); /* xor dreg_hi,dreg_hi */ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), @@ -1069,27 +902,10 @@ static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val, } /* Do LSH operation */ if (val < 32) { - /* shl dreg_hi,imm8 */ - EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val); - /* mov ebx,dreg_lo */ - EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* shld dreg_hi,dreg_lo,imm8 */ + EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val); /* shl dreg_lo,imm8 */ EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shr ebx,cl */ - EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); - /* or dreg_hi,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1135,27 +951,10 @@ static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val, /* Do RSH operation */ if (val < 32) { - /* shr dreg_lo,imm8 */ - EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,imm8 */ + EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val); /* shr dreg_hi,imm8 */ EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1200,27 +999,10 @@ static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val, } /* Do RSH operation */ if (val < 32) { - /* shr dreg_lo,imm8 */ - EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,imm8 */ + EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val); /* ashr dreg_hi,imm8 */ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val); - - /* IA32_ECX = 32 - val */ - /* mov ecx,val */ - EMIT2(0xB1, val); - /* movzx ecx,ecx */ - EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); } else if (val >= 32 && val < 64) { u32 value = val - 32; @@ -1713,8 +1495,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_MOV | BPF_X: switch (BPF_SRC(code)) { case BPF_X: - emit_ia32_mov_r64(is64, dst, src, dstk, - sstk, &prog); + if (imm32 == 1) { + /* Special mov32 for zext. */ + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + break; + } + emit_ia32_mov_r64(is64, dst, src, dstk, sstk, + &prog, bpf_prog->aux); break; case BPF_K: /* Sign-extend immediate value to dst reg */ @@ -1754,11 +1541,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, switch (BPF_SRC(code)) { case BPF_X: emit_ia32_alu_r64(is64, BPF_OP(code), dst, - src, dstk, sstk, &prog); + src, dstk, sstk, &prog, + bpf_prog->aux); break; case BPF_K: emit_ia32_alu_i64(is64, BPF_OP(code), dst, - imm32, dstk, &prog); + imm32, dstk, &prog, + bpf_prog->aux); break; } break; @@ -1777,7 +1566,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, false, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: @@ -1797,7 +1587,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = dst / src(imm) */ /* dst = dst % src(imm) */ @@ -1819,7 +1610,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, &prog); break; } - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_X: @@ -1836,7 +1628,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk, false, &prog); - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = dst << imm */ case BPF_ALU64 | BPF_LSH | BPF_K: @@ -1872,7 +1665,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_NEG: emit_ia32_alu_i(is64, false, BPF_OP(code), dst_lo, 0, dstk, &prog); - emit_ia32_mov_i(dst_hi, 0, dstk, &prog); + if (!bpf_prog->aux->verifier_zext) + emit_ia32_mov_i(dst_hi, 0, dstk, &prog); break; /* dst = ~dst (64 bit) */ case BPF_ALU64 | BPF_NEG: @@ -1892,11 +1686,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; /* dst = htole(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: - emit_ia32_to_le_r64(dst, imm32, dstk, &prog); + emit_ia32_to_le_r64(dst, imm32, dstk, &prog, + bpf_prog->aux); break; /* dst = htobe(dst) */ case BPF_ALU | BPF_END | BPF_FROM_BE: - emit_ia32_to_be_r64(dst, imm32, dstk, &prog); + emit_ia32_to_be_r64(dst, imm32, dstk, &prog, + bpf_prog->aux); break; /* dst = imm64 */ case BPF_LD | BPF_IMM | BPF_DW: { @@ -2051,6 +1847,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_B: case BPF_H: case BPF_W: + if (!bpf_prog->aux->verifier_zext) + break; if (dstk) { EMIT3(0xC7, add_1reg(0x40, IA32_EBP), STACK_VAR(dst_hi)); @@ -2475,6 +2273,11 @@ notyet: return proglen; } +bool bpf_jit_needs_zext(void) +{ + return true; +} + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index 17185d73d649..ee6b0780bea1 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -104,24 +104,12 @@ DEFINE_SHOW_ATTRIBUTE(punit_dev_state); static struct dentry *punit_dbg_file; -static int punit_dbgfs_register(struct punit_device *punit_device) +static void punit_dbgfs_register(struct punit_device *punit_device) { - struct dentry *dev_state; - punit_dbg_file = debugfs_create_dir("punit_atom", NULL); - if (!punit_dbg_file) - return -ENXIO; - - dev_state = debugfs_create_file("dev_power_state", 0444, - punit_dbg_file, punit_device, - &punit_dev_state_fops); - if (!dev_state) { - pr_err("punit_dev_state register failed\n"); - debugfs_remove(punit_dbg_file); - return -ENXIO; - } - return 0; + debugfs_create_file("dev_power_state", 0444, punit_dbg_file, + punit_device, &punit_dev_state_fops); } static void punit_dbgfs_unregister(void) @@ -145,15 +133,12 @@ MODULE_DEVICE_TABLE(x86cpu, intel_punit_cpu_ids); static int __init punit_atom_debug_init(void) { const struct x86_cpu_id *id; - int ret; id = x86_match_cpu(intel_punit_cpu_ids); if (!id) return -ENODEV; - ret = punit_dbgfs_register((struct punit_device *)id->driver_data); - if (ret < 0) - return ret; + punit_dbgfs_register((struct punit_device *)id->driver_data); return 0; } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 632b83885867..3b9fd679cea9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -728,7 +728,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr) * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so * page faulting on these addresses isn't expected. */ - if (phys_addr >= 0x0000 && phys_addr <= 0x0fff) + if (phys_addr <= 0x0fff) return; /* diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c index 8d4daca81eda..c33f744b5388 100644 --- a/arch/x86/platform/geode/alix.c +++ b/arch/x86/platform/geode/alix.c @@ -20,7 +20,6 @@ #include <linux/moduleparam.h> #include <linux/leds.h> #include <linux/platform_device.h> -#include <linux/gpio.h> #include <linux/input.h> #include <linux/gpio_keys.h> #include <linux/dmi.h> diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c index 136974ec9a90..73a3f49b4eb6 100644 --- a/arch/x86/platform/geode/geos.c +++ b/arch/x86/platform/geode/geos.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <linux/leds.h> #include <linux/platform_device.h> -#include <linux/gpio.h> #include <linux/input.h> #include <linux/gpio_keys.h> #include <linux/dmi.h> diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 2c24d8d30436..163e1b545517 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -18,7 +18,6 @@ #include <linux/string.h> #include <linux/leds.h> #include <linux/platform_device.h> -#include <linux/gpio.h> #include <linux/input.h> #include <linux/gpio_keys.h> diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index b5420371d32d..6dd25dc5f027 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -35,7 +35,6 @@ #include <linux/types.h> struct imr_device { - struct dentry *file; bool init; struct mutex lock; int max_imr; @@ -231,13 +230,11 @@ DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state); * imr_debugfs_register - register debugfs hooks. * * @idev: pointer to imr_device structure. - * @return: 0 on success - errno on failure. */ -static int imr_debugfs_register(struct imr_device *idev) +static void imr_debugfs_register(struct imr_device *idev) { - idev->file = debugfs_create_file("imr_state", 0444, NULL, idev, - &imr_dbgfs_state_fops); - return PTR_ERR_OR_ZERO(idev->file); + debugfs_create_file("imr_state", 0444, NULL, idev, + &imr_dbgfs_state_fops); } /** @@ -582,7 +579,6 @@ static const struct x86_cpu_id imr_ids[] __initconst = { static int __init imr_init(void) { struct imr_device *idev = &imr_dev; - int ret; if (!x86_match_cpu(imr_ids) || !iosf_mbi_available()) return -ENODEV; @@ -592,9 +588,7 @@ static int __init imr_init(void) idev->init = true; mutex_init(&idev->lock); - ret = imr_debugfs_register(idev); - if (ret != 0) - pr_warn("debugfs register failed!\n"); + imr_debugfs_register(idev); imr_fixup_memmap(idev); return 0; } diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index b393eaa798ef..2e796b54cbde 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -461,31 +461,16 @@ static struct dentry *iosf_dbg; static void iosf_sideband_debug_init(void) { - struct dentry *d; - iosf_dbg = debugfs_create_dir("iosf_sb", NULL); - if (IS_ERR_OR_NULL(iosf_dbg)) - return; /* mdr */ - d = debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); - if (!d) - goto cleanup; + debugfs_create_x32("mdr", 0660, iosf_dbg, &dbg_mdr); /* mcrx */ - d = debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); - if (!d) - goto cleanup; + debugfs_create_x32("mcrx", 0660, iosf_dbg, &dbg_mcrx); /* mcr - initiates mailbox tranaction */ - d = debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); - if (!d) - goto cleanup; - - return; - -cleanup: - debugfs_remove_recursive(d); + debugfs_create_file("mcr", 0660, iosf_dbg, &dbg_mcr, &iosf_mcr_fops); } static void iosf_debugfs_init(void) diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index c85d485eb4f8..ee2beda590d0 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -26,9 +26,6 @@ struct olpc_platform_t olpc_platform_info; EXPORT_SYMBOL_GPL(olpc_platform_info); -/* EC event mask to be applied during suspend (defining wakeup sources). */ -static u16 ec_wakeup_mask; - /* what the timeout *should* be (in ms) */ #define EC_BASE_TIMEOUT 20 @@ -182,83 +179,6 @@ err: return ret; } -void olpc_ec_wakeup_set(u16 value) -{ - ec_wakeup_mask |= value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_set); - -void olpc_ec_wakeup_clear(u16 value) -{ - ec_wakeup_mask &= ~value; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_clear); - -/* - * Returns true if the compile and runtime configurations allow for EC events - * to wake the system. - */ -bool olpc_ec_wakeup_available(void) -{ - if (!machine_is_olpc()) - return false; - - /* - * XO-1 EC wakeups are available when olpc-xo1-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO1_SCI - if (olpc_platform_info.boardrev < olpc_board_pre(0xd0)) /* XO-1 */ - return true; -#endif - - /* - * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is - * compiled in - */ -#ifdef CONFIG_OLPC_XO15_SCI - if (olpc_platform_info.boardrev >= olpc_board_pre(0xd0)) /* XO-1.5 */ - return true; -#endif - - return false; -} -EXPORT_SYMBOL_GPL(olpc_ec_wakeup_available); - -int olpc_ec_mask_write(u16 bits) -{ - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word = cpu_to_be16(bits); - return olpc_ec_cmd(EC_WRITE_EXT_SCI_MASK, (void *) &ec_word, 2, - NULL, 0); - } else { - unsigned char ec_byte = bits & 0xff; - return olpc_ec_cmd(EC_WRITE_SCI_MASK, &ec_byte, 1, NULL, 0); - } -} -EXPORT_SYMBOL_GPL(olpc_ec_mask_write); - -int olpc_ec_sci_query(u16 *sci_value) -{ - int ret; - - if (olpc_platform_info.flags & OLPC_F_EC_WIDE_SCI) { - __be16 ec_word; - ret = olpc_ec_cmd(EC_EXT_SCI_QUERY, - NULL, 0, (void *) &ec_word, 2); - if (ret == 0) - *sci_value = be16_to_cpu(ec_word); - } else { - unsigned char ec_byte; - ret = olpc_ec_cmd(EC_SCI_QUERY, NULL, 0, &ec_byte, 1); - if (ret == 0) - *sci_value = ec_byte; - } - - return ret; -} -EXPORT_SYMBOL_GPL(olpc_ec_sci_query); - static bool __init check_ofw_architecture(struct device_node *root) { const char *olpc_arch; @@ -292,6 +212,10 @@ static bool __init platform_detect(void) if (success) { olpc_platform_info.boardrev = get_board_revision(root); olpc_platform_info.flags |= OLPC_F_PRESENT; + + pr_info("OLPC board revision %s%X\n", + ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", + olpc_platform_info.boardrev >> 4); } of_node_put(root); @@ -311,27 +235,8 @@ static int __init add_xo1_platform_devices(void) return PTR_ERR_OR_ZERO(pdev); } -static int olpc_xo1_ec_probe(struct platform_device *pdev) -{ - /* get the EC revision */ - olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, - (unsigned char *) &olpc_platform_info.ecver, 1); - - /* EC version 0x5f adds support for wide SCI mask */ - if (olpc_platform_info.ecver >= 0x5f) - olpc_platform_info.flags |= OLPC_F_EC_WIDE_SCI; - - pr_info("OLPC board revision %s%X (EC=%x)\n", - ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", - olpc_platform_info.boardrev >> 4, - olpc_platform_info.ecver); - - return 0; -} static int olpc_xo1_ec_suspend(struct platform_device *pdev) { - olpc_ec_mask_write(ec_wakeup_mask); - /* * Squelch SCIs while suspended. This is a fix for * <http://dev.laptop.org/ticket/1835>. @@ -355,15 +260,27 @@ static int olpc_xo1_ec_resume(struct platform_device *pdev) } static struct olpc_ec_driver ec_xo1_driver = { - .probe = olpc_xo1_ec_probe, .suspend = olpc_xo1_ec_suspend, .resume = olpc_xo1_ec_resume, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO1_SCI + /* + * XO-1 EC wakeups are available when olpc-xo1-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static struct olpc_ec_driver ec_xo1_5_driver = { - .probe = olpc_xo1_ec_probe, .ec_cmd = olpc_xo1_ec_cmd, +#ifdef CONFIG_OLPC_XO1_5_SCI + /* + * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is + * compiled in + */ + .wakeup_available = true, +#endif }; static int __init olpc_init(void) diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c index c78bfc16a3ca..26d1f6693789 100644 --- a/arch/x86/platform/olpc/olpc_dt.c +++ b/arch/x86/platform/olpc/olpc_dt.c @@ -216,7 +216,7 @@ static u32 __init olpc_dt_get_board_revision(void) return be32_to_cpu(rev); } -int olpc_dt_compatible_match(phandle node, const char *compat) +static int __init olpc_dt_compatible_match(phandle node, const char *compat) { char buf[64], *p; int plen, len; diff --git a/arch/x86/platform/pvh/enlighten.c b/arch/x86/platform/pvh/enlighten.c index 1861a2ba0f2b..c0a502f7e3a7 100644 --- a/arch/x86/platform/pvh/enlighten.c +++ b/arch/x86/platform/pvh/enlighten.c @@ -86,7 +86,7 @@ static void __init init_pvh_bootparams(bool xen_guest) } /* - * See Documentation/x86/boot.txt. + * See Documentation/x86/boot.rst. * * Version 2.12 supports Xen entry point but we will use default x86/PC * environment (i.e. hardware_subarch 0). diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 0c7dfec4acac..20c389a91b80 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -66,7 +66,6 @@ static struct tunables tunables[] = { }; static struct dentry *tunables_dir; -static struct dentry *tunables_file; /* these correspond to the statistics printed by ptc_seq_show() */ static char *stat_description[] = { @@ -1700,18 +1699,8 @@ static int __init uv_ptc_init(void) } tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); - if (!tunables_dir) { - pr_err("unable to create debugfs directory %s\n", - UV_BAU_TUNABLES_DIR); - return -EINVAL; - } - tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, - tunables_dir, NULL, &tunables_fops); - if (!tunables_file) { - pr_err("unable to create debugfs file %s\n", - UV_BAU_TUNABLES_FILE); - return -EINVAL; - } + debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL, + &tunables_fops); return 0; } diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index a9c3db125222..9ad6842de4b4 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -11,3 +11,13 @@ config RAS_CEC Bear in mind that this is absolutely useless if your platform doesn't have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS. + +config RAS_CEC_DEBUG + bool "CEC debugging machinery" + default n + depends on RAS_CEC + help + Add extra files to (debugfs)/ras/cec to test the correctable error + collector feature. "pfn" is a writable file that allows user to + simulate an error in a particular page frame. "array" is a read-only + file that dumps out the current state of all pages logged so far. diff --git a/arch/x86/tools/insn_decoder_test.c b/arch/x86/tools/insn_decoder_test.c index e455349e0ab5..34eda63c124b 100644 --- a/arch/x86/tools/insn_decoder_test.c +++ b/arch/x86/tools/insn_decoder_test.c @@ -111,7 +111,7 @@ static void parse_args(int argc, char **argv) int main(int argc, char **argv) { char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; - unsigned char insn_buf[16]; + unsigned char insn_buff[16]; struct insn insn; int insns = 0; int warnings = 0; @@ -130,7 +130,7 @@ int main(int argc, char **argv) } insns++; - memset(insn_buf, 0, 16); + memset(insn_buff, 0, 16); strcpy(copy, line); tab1 = strchr(copy, '\t'); if (!tab1) @@ -143,13 +143,13 @@ int main(int argc, char **argv) *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ while (s < tab2) { if (sscanf(s, "%x", &b) == 1) { - insn_buf[nb++] = (unsigned char) b; + insn_buff[nb++] = (unsigned char) b; s += 3; } else break; } /* Decode an instruction */ - insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); + insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64); insn_get_length(&insn); if (insn.length != nb) { warnings++; diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index 14cf07916081..185ceba9d289 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -83,7 +83,7 @@ static void dump_insn(FILE *fp, struct insn *insn) } static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, - unsigned char *insn_buf, struct insn *insn) + unsigned char *insn_buff, struct insn *insn) { int i; @@ -96,7 +96,7 @@ static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter, /* Input a decoded instruction sequence directly */ fprintf(fp, " $ echo "); for (i = 0; i < MAX_INSN_SIZE; i++) - fprintf(fp, " %02x", insn_buf[i]); + fprintf(fp, " %02x", insn_buff[i]); fprintf(fp, " | %s -i -\n", prog); if (!input_file) { @@ -124,7 +124,7 @@ fail: } /* Read given instruction sequence from the input file */ -static int read_next_insn(unsigned char *insn_buf) +static int read_next_insn(unsigned char *insn_buff) { char buf[256] = "", *tmp; int i; @@ -134,7 +134,7 @@ static int read_next_insn(unsigned char *insn_buf) return 0; for (i = 0; i < MAX_INSN_SIZE; i++) { - insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16); + insn_buff[i] = (unsigned char)strtoul(tmp, &tmp, 16); if (*tmp != ' ') break; } @@ -142,19 +142,19 @@ static int read_next_insn(unsigned char *insn_buf) return i; } -static int generate_insn(unsigned char *insn_buf) +static int generate_insn(unsigned char *insn_buff) { int i; if (input_file) - return read_next_insn(insn_buf); + return read_next_insn(insn_buff); /* Fills buffer with random binary up to MAX_INSN_SIZE */ for (i = 0; i < MAX_INSN_SIZE - 1; i += 2) - *(unsigned short *)(&insn_buf[i]) = random() & 0xffff; + *(unsigned short *)(&insn_buff[i]) = random() & 0xffff; while (i < MAX_INSN_SIZE) - insn_buf[i++] = random() & 0xff; + insn_buff[i++] = random() & 0xff; return i; } @@ -226,31 +226,31 @@ int main(int argc, char **argv) int insns = 0; int errors = 0; unsigned long i; - unsigned char insn_buf[MAX_INSN_SIZE * 2]; + unsigned char insn_buff[MAX_INSN_SIZE * 2]; parse_args(argc, argv); /* Prepare stop bytes with NOPs */ - memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); + memset(insn_buff + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE); for (i = 0; i < iter_end; i++) { - if (generate_insn(insn_buf) <= 0) + if (generate_insn(insn_buff) <= 0) break; if (i < iter_start) /* Skip to given iteration number */ continue; /* Decode an instruction */ - insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64); + insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64); insn_get_length(&insn); if (insn.next_byte <= insn.kaddr || insn.kaddr + MAX_INSN_SIZE < insn.next_byte) { /* Access out-of-range memory */ - dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn); + dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn); errors++; } else if (verbose && !insn_complete(&insn)) - dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn); + dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn); else if (verbose >= 2) dump_insn(stdout, &insn); insns++; diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 8b4a71efe7ee..7c11c9e5d7ea 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -471,7 +471,7 @@ long sys_sigreturn(void) return PT_REGS_SYSCALL_RET(¤t->thread.regs); segfault: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } @@ -577,6 +577,6 @@ long sys_rt_sigreturn(void) return PT_REGS_SYSCALL_RET(¤t->thread.regs); segfault: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index e07abefd3d26..ba5a41828e9d 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -7,6 +7,7 @@ config XEN bool "Xen guest support" depends on PARAVIRT select PARAVIRT_CLOCK + select X86_HV_CALLBACK_VECTOR depends on X86_64 || (X86_32 && X86_PAE) depends on X86_LOCAL_APIC && X86_TSC help diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index 13da87918b4f..532410998684 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c @@ -9,13 +9,8 @@ static struct dentry *d_xen_debug; struct dentry * __init xen_init_debugfs(void) { - if (!d_xen_debug) { + if (!d_xen_debug) d_xen_debug = debugfs_create_dir("xen", NULL); - - if (!d_xen_debug) - pr_warning("Could not create 'xen' debugfs directory\n"); - } - return d_xen_debug; } diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index beb44e22afdf..f6e5eeecfc69 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2700,8 +2700,7 @@ struct remap_data { struct mmu_update *mmu_update; }; -static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token, - unsigned long addr, void *data) +static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data) { struct remap_data *rmd = data; pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot)); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 95ce9b5be411..0acba2c712ab 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -817,9 +817,6 @@ static int __init xen_p2m_debugfs(void) { struct dentry *d_xen = xen_init_debugfs(); - if (d_xen == NULL) - return -ENOMEM; - d_mmu_debug = debugfs_create_dir("mmu", d_xen); debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 590fcf863006..802ee5bba66c 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -58,6 +58,7 @@ static void cpu_bringup(void) { int cpu; + cr4_init(); cpu_init(); touch_softlockup_watchdog(); preempt_disable(); @@ -251,6 +252,7 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 6ec1b75eabc5..ebc135bda921 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -2,6 +2,7 @@ config XTENSA def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_BINFMT_FLAT if !MMU select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_NO_COHERENT_DMA_MMAP if !MMU diff --git a/arch/xtensa/include/asm/flat.h b/arch/xtensa/include/asm/flat.h index b8532d7877b3..ed5870c779f9 100644 --- a/arch/xtensa/include/asm/flat.h +++ b/arch/xtensa/include/asm/flat.h @@ -4,11 +4,8 @@ #include <asm/unaligned.h> -#define flat_argvp_envp_on_stack() 0 -#define flat_old_ram_flag(flags) (flags) -#define flat_reloc_valid(reloc, size) ((reloc) <= (size)) static inline int flat_get_addr_from_rp(u32 __user *rp, u32 relval, u32 flags, - u32 *addr, u32 *persistent) + u32 *addr) { *addr = get_unaligned((__force u32 *)rp); return 0; @@ -18,7 +15,5 @@ static inline int flat_put_addr_at_rp(u32 __user *rp, u32 addr, u32 rel) put_unaligned(addr, (__force u32 *)rp); return 0; } -#define flat_get_relocate_addr(rel) (rel) -#define flat_set_persistent(relval, p) 0 #endif /* __ASM_XTENSA_FLAT_H */ diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index 30af4dc3ce7b..b52236245e51 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -3,6 +3,7 @@ #define _XTENSA_UNISTD_H #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #include <uapi/asm/unistd.h> #define __ARCH_WANT_NEW_STAT diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index a87f8a308cc1..65f05776d827 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -163,10 +163,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = phys_to_dma(dev, page_to_phys(page)); - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - return page; - } - #ifdef CONFIG_MMU if (PageHighMem(page)) { void *p; @@ -192,9 +188,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr, unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct page *page; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { - page = vaddr; - } else if (platform_vaddr_uncached(vaddr)) { + if (platform_vaddr_uncached(vaddr)) { page = virt_to_page(platform_vaddr_to_cached(vaddr)); } else { #ifdef CONFIG_MMU diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index dc22a238ed9c..fbedf2aba09d 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -270,7 +270,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3, return ret; badframe: - force_sig(SIGSEGV, current); + force_sig(SIGSEGV); return 0; } diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 5fa0ee1c8e00..25f4de729a6d 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -404,3 +404,5 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +435 common clone3 sys_clone3 diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 454d53096bc9..f060348c1b23 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -184,7 +184,7 @@ void do_unhandled(struct pt_regs *regs, unsigned long exccause) "\tEXCCAUSE is %ld\n", current->comm, task_pid_nr(current), regs->pc, exccause); - force_sig(SIGILL, current); + force_sig(SIGILL); } /* @@ -306,7 +306,7 @@ do_illegal_instruction(struct pt_regs *regs) pr_info_ratelimited("Illegal Instruction in '%s' (pid = %d, pc = %#010lx)\n", current->comm, task_pid_nr(current), regs->pc); - force_sig(SIGILL, current); + force_sig(SIGILL); } @@ -330,7 +330,7 @@ do_unaligned_user (struct pt_regs *regs) "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, task_pid_nr(current), regs->pc); - force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr); } #endif @@ -354,7 +354,7 @@ do_debug(struct pt_regs *regs) /* If in user mode, send SIGTRAP signal to current process */ - force_sig(SIGTRAP, current); + force_sig(SIGTRAP); } diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 2ab0e0dcd166..f81b1478da61 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -157,7 +157,7 @@ bad_area: if (user_mode(regs)) { current->thread.bad_vaddr = address; current->thread.error_code = is_write; - force_sig_fault(SIGSEGV, code, (void *) address, current); + force_sig_fault(SIGSEGV, code, (void *) address); return; } bad_page_fault(regs, address, SIGSEGV); @@ -182,7 +182,7 @@ do_sigbus: * or user mode. */ current->thread.bad_vaddr = address; - force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) |