diff options
Diffstat (limited to 'arch')
51 files changed, 262 insertions, 81 deletions
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index f05bdb4b1cb9..ff4049155c84 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -297,7 +297,9 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, unsigned long size) { return ioremap(offset, size); -} +} + +#define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c index 69d52aa37bae..f2d81ff38aa6 100644 --- a/arch/alpha/lib/udelay.c +++ b/arch/alpha/lib/udelay.c @@ -30,6 +30,7 @@ __delay(int loops) " bgt %0,1b" : "=&r" (tmp), "=r" (loops) : "1"(loops)); } +EXPORT_SYMBOL(__delay); #ifdef CONFIG_SMP #define LPJ cpu_data[smp_processor_id()].loops_per_jiffy diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7451b447cc2d..2c2b28ee4811 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -54,6 +54,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7bbf325a4f31..b2bc8e11471d 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -491,11 +491,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm - .macro uaccess_save_and_disable, tmp - uaccess_save \tmp - uaccess_disable \tmp - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index b274bde24905..e7335a92144e 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -40,6 +40,7 @@ do { \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ ".pushsection __bug_table,\"a\"\n" \ + ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index e878129f2fee..fc8ba1663601 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include <asm/barrier.h> +#include <asm/thread_info.h> #endif /* @@ -89,7 +90,8 @@ static inline unsigned int get_domain(void) asm( "mrc p15, 0, %0, c3, c0 @ get domain" - : "=r" (domain)); + : "=r" (domain) + : "m" (current_thread_info()->cpu_domain)); return domain; } @@ -98,7 +100,7 @@ static inline void set_domain(unsigned val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" - : : "r" (val)); + : : "r" (val) : "memory"); isb(); } diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index d0a1119dcaf3..776757d1604a 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -25,7 +25,6 @@ struct task_struct; #include <asm/types.h> -#include <asm/domain.h> typedef unsigned long mm_segment_t; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index a3089bacb8d8..7a7c4cea5523 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -226,6 +226,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); +#ifdef CONFIG_CPU_USE_DOMAINS /* * Copy the initial value of the domain access control register * from the current thread: thread->addr_limit will have been @@ -233,6 +234,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, * kernel/fork.c */ thread->cpu_domain = get_domain(); +#endif if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 71df43547659..39c20afad7ed 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -95,9 +95,10 @@ emulate: reteq r4 @ no, return failure next: + uaccess_enable r3 .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC - + uaccess_disable r3 and r2, r6, #0x0F000000 @ test for FP insns teq r2, #0x0C000000 teqne r2, #0x0D000000 diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index f00e08075938..10fd99c568c6 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -98,8 +98,23 @@ ENTRY(privcmd_call) mov r1, r2 mov r2, r3 ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ ldr r4, [sp, #4] __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is + * called before. + */ + uaccess_disable r4 + ldm sp!, {r4} ret lr ENDPROC(privcmd_call); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d95663c0160..07d1811aa03f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15ff5b4156fd..f9914d7c1bb0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6900b2d95371..b0329be95cb1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,13 +26,9 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#ifdef CONFIG_ARM64_HW_AFDBM -#define PTE_WRITE (PTE_DBM) /* same as DBM */ -#else -#define PTE_WRITE (_AT(pteval_t, 1) << 57) -#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -146,7 +142,7 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #ifdef CONFIG_ARM64_HW_AFDBM -#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else #define pte_hw_dirty(pte) (0) #endif @@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * - * PTE_DIRTY || !PTE_RDONLY + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) @@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - newprot |= PTE_DIRTY; + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3b62ac9c24..cebf78661a55 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a055be6125cf..90d09eddd5b2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index c97040ecf838..bba85c8f8037 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6ef..876eb8df50bf 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 948f0ad2de23..71ef6dc89ae5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0bcc4bc94b4a..99224dcebdc5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { + if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { struct page *page; void *addr; diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 95c39b95e97e..99c96a5e6016 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 319 /* length of syscall table */ +#define NR_syscalls 321 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 461079560c78..98e94e19a5a0 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -332,5 +332,7 @@ #define __NR_memfd_create 1340 #define __NR_bpf 1341 #define __NR_execveat 1342 +#define __NR_userfaultfd 1343 +#define __NR_membarrier 1344 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ae0de7bf5525..37cc7a65cd3e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1768,5 +1768,7 @@ sys_call_table: data8 sys_memfd_create // 1340 data8 sys_bpf data8 sys_execveat + data8 sys_userfaultfd + data8 sys_membarrier .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8e..4eec430d8fa8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 71f2b3f02cf8..4d65499ee1c1 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -368,3 +368,4 @@ SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) COMPAT_SYS(execveat) PPC64ONLY(switch_endian) +SYSCALL_SPU(userfaultfd) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f4f8b667d75b..4a055b6c2a64 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include <uapi/asm/unistd.h> -#define __NR_syscalls 364 +#define __NR_syscalls 365 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e4aa173dae62..6ad58d4c879b 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -386,5 +386,6 @@ #define __NR_bpf 361 #define __NR_execveat 362 #define __NR_switch_endian 363 +#define __NR_userfaultfd 364 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index bb02e9f6944e..ad8c9db61237 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -38,6 +38,7 @@ #include <asm/udbg.h> #include <asm/mmu_context.h> #include <asm/epapr_hcalls.h> +#include <asm/code-patching.h> #define DBG(fmt...) @@ -109,6 +110,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ +extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ + notrace void __init machine_init(u64 dt_ptr) { lockdep_init(); @@ -116,6 +119,9 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); + patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction(&memset_nocache_branch, PPC_INST_NOP); + /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 2ef50c629470..c44df2dbedd5 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -73,6 +73,10 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore skip the optimised bloc that uses dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memset) rlwimi r4,r4,8,16,23 @@ -88,6 +92,8 @@ _GLOBAL(memset) subf r6,r0,r6 cmplwi 0,r4,0 bne 2f /* Use normal procedure if r4 is not zero */ +_GLOBAL(memset_nocache_branch) + b 2f /* Skip optimised bloc until cache is enabled */ clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 @@ -128,6 +134,10 @@ _GLOBAL(memset) * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memmove) cmplw 0,r3,r4 @@ -135,6 +145,7 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) + b generic_memcpy add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46..4d87122cf6a7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e66ef1943338..b304a9fe55cc 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -63,6 +63,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -70,10 +71,10 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2927cd5c8303..414fd1a00fda 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2049,9 +2049,23 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) struct iommu_table *tbl = NULL; long rc; + /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, + window_size, POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b2480b265c0..f2dd77234240 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47d9cebe7159..db17827eb746 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -422,8 +422,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); of_node_put(parent); - if (!dn) + if (!dn) { + dlpar_release_drc(drc_index); return -EINVAL; + } rc = dlpar_attach_node(dn); if (rc) { diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 5916da1856a7..48a576aa47b9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 70fbd5694a8b..2cbc7e29b85f 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 24d0470c1698..8fb806135043 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7aef2d52daa0..328c8352480c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1006,7 +1006,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config X86_LEGACY_VM86 - bool "Legacy VM86 support (obsolete)" + bool "Legacy VM86 support" default n depends on X86_32 ---help--- @@ -1018,19 +1018,20 @@ config X86_LEGACY_VM86 available to accelerate real mode DOS programs. However, any recent version of DOSEMU, X, or vbetool should be fully functional even without kernel VM86 support, as they will all - fall back to (pretty well performing) software emulation. + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. - Anything that works on a 64-bit kernel is unlikely to need - this option, as 64-bit kernels don't, and can't, support V8086 - mode. This option is also unrelated to 16-bit protected mode - and is not needed to run most 16-bit programs under Wine. + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. - Enabling this option adds considerable attack surface to the - kernel and slows down system calls and exception handling. + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. - Unless you use very old userspace or need the last drop of - performance in your real mode DOS games and can't use KVM, - say N here. + If unsure, say N here. config VM86 bool diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 477fc28050e4..e6cf2ad350d1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -241,6 +241,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4fa7c6..31247b5bff7c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae1cba3..eaba08076030 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock - -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifdef CONFIG_PARAVIRT +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } +#endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827eb86cf..25f909362b7a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -338,10 +338,15 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46aa405..24e94ce454e2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f826530..5c60bb162622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; const struct cpumask *mask; struct irq_data *idata; + struct irq_chip *chip; if (skip_ioapic_setup == 1) return; @@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - irq_set_affinity(irq, mask); + chip = irq_data_get_irq_chip(idata); + chip->irq_set_affinity(idata, mask, false); } - } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c22ec8..de22ea7ff82f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "%d86", c->x86); - printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); + printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); else printk(KERN_CONT ")\n"); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index cd9b6d0b10bf..3fefebfbdf4b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2316,9 +2316,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index 54690e885759..d1c0f254afbe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event) if (!buf || bts_buffer_is_full(buf, bts)) return; + event->hw.itrace_started = 1; event->hw.state = 0; if (!buf->snapshot) diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc0525f1c1..6acc9dd91f36 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) if (alloc_size > PAGE_SIZE) new_ldt->entries = vzalloc(alloc_size); else - new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); if (!new_ldt->entries) { kfree(new_ldt); @@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(ldt->entries); else - kfree(ldt->entries); + free_page((unsigned long)ldt->entries); kfree(ldt); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 84b8ef82a159..1b55de1267cf 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -131,8 +131,8 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!*dev) *dev = &x86_dma_fallback_dev; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c8d52cb4cb6e..c3f7602cd038 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include <asm/hypervisor.h> #include <asm/nmi.h> #include <asm/x86_init.h> +#include <asm/geode.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856bd2b..524619351961 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -45,6 +45,7 @@ #include <linux/audit.h> #include <linux/stddef.h> #include <linux/slab.h> +#include <linux/security.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) struct pt_regs *regs = current_pt_regs(); unsigned long err = 0; + err = security_mmap_addr(0); + if (err) { + /* + * vm86 cannot virtualize the address space, so vm86 users + * need to manage the low 1MB themselves using mmap. Given + * that BIOS places important data in the first page, vm86 + * is essentially useless if mmap_min_addr != 0. DOSEMU, + * for example, won't even bother trying to use vm86 if it + * can't map a page at virtual address 0. + * + * To reduce the available kernel attack surface, simply + * disallow vm86(old) for users who cannot mmap at va 0. + * + * The implementation of security_mmap_addr will allow + * suitably privileged users to map va 0 even if + * vm.mmap_min_addr is set above 0, and we want this + * behavior for vm86 as well, as it ensures that legacy + * tools like vbetool will not fail just because of + * vm.mmap_min_addr. + */ + pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", + current->comm, task_pid_nr(current), + from_kuid_munged(&init_user_ns, current_uid())); + return -EPERM; + } + if (!vm86) { if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) return -ENOMEM; diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a60aa6e..c2aea63bee20 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : ""); + hotpluggable ? " hotplug" : "", + ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); /* Mark hotplug range in memblock. */ if (hotpluggable && memblock_mark_hotplug(start, ma->length)) |