diff options
Diffstat (limited to 'arch')
132 files changed, 958 insertions, 799 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 6c00e5b00f8b..f76b214cf7ad 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -867,4 +867,13 @@ config STRICT_MODULE_RWX config ARCH_WANT_RELAX_ORDER bool +config REFCOUNT_FULL + bool "Perform full reference count validation at the expense of speed" + help + Enabling this switches the refcounting infrastructure from a fast + unchecked atomic_t implementation to a fully state checked + implementation, which can be (slightly) slower but provides protections + against various use-after-free conditions that can be used in + security flaw exploits. + source "kernel/gcov/Kconfig" diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index 6e1242da0159..4104a0839214 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -86,8 +86,6 @@ struct task_struct; #define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4) #define TSK_K_FP(tsk) TSK_K_REG(tsk, 0) -#define thread_saved_pc(tsk) TSK_K_BLINK(tsk) - extern void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long usp); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 4c1a35f15838..bf3285b7532a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1416,6 +1416,7 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" config VMSPLIT_3G_OPT + depends on !ARM_LPAE bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" @@ -2061,6 +2062,23 @@ config EFI is only useful for kernels that may run on systems that have UEFI firmware. +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + + NOTE: This does *NOT* enable or encourage the use of DMI quirks, + i.e., the the practice of identifying the platform via DMI to + decide whether certain workarounds for buggy hardware and/or + firmware need to be enabled. This would require the DMI subsystem + to be enabled much earlier than we do on ARM, which is non-trivial. + endmenu menu "CPU Power Management" diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index 3f7d1b74c5e0..a17ca8d78656 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -17,7 +17,8 @@ @ there. .inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000 #else - W(mov) r0, r0 + AR_CLASS( mov r0, r0 ) + M_CLASS( nop.w ) #endif .endm diff --git a/arch/arm/include/asm/dmi.h b/arch/arm/include/asm/dmi.h new file mode 100644 index 000000000000..df2d2ff06f5b --- /dev/null +++ b/arch/arm/include/asm/dmi.h @@ -0,0 +1,19 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include <linux/io.h> +#include <linux/slab.h> + +#define dmi_early_remap(x, l) memremap(x, l, MEMREMAP_WB) +#define dmi_early_unmap(x, l) memunmap(x) +#define dmi_remap(x, l) memremap(x, l, MEMREMAP_WB) +#define dmi_unmap(x) memunmap(x) +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) + +#endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 32e1a9513dc7..4e80bf7420d4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -315,7 +315,7 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { unsigned int cachetype = read_cpuid_cachetype(); - if ((arch == CPU_ARCH_ARMv7M) && !cachetype) { + if ((arch == CPU_ARCH_ARMv7M) && !(cachetype & 0xf000f)) { cacheid = 0; } else if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 572a8df1b766..c9a0a5299827 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -555,8 +555,7 @@ static DEFINE_RAW_SPINLOCK(stop_lock); */ static void ipi_cpu_stop(unsigned int cpu) { - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { + if (system_state <= SYSTEM_RUNNING) { raw_spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); dump_stack(); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 31af3cb59a60..e46a6a446cdd 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1218,15 +1218,15 @@ void __init adjust_lowmem_bounds(void) high_memory = __va(arm_lowmem_limit - 1) + 1; + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + /* * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the * last full pmd, which should be mapped. */ - if (memblock_limit) - memblock_limit = round_down(memblock_limit, PMD_SIZE); - if (!memblock_limit) - memblock_limit = arm_lowmem_limit; + memblock_limit = round_down(memblock_limit, PMD_SIZE); if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { if (memblock_end_of_DRAM() > arm_lowmem_limit) { diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 5d17f377d905..82cd07592519 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,7 +11,6 @@ * */ -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/init.h> @@ -117,20 +116,6 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, set_permissions, md); } -static int __init arm64_dmi_init(void) -{ - /* - * On arm64, DMI depends on UEFI, and dmi_scan_machine() needs to - * be called early because dmi_id_init(), which is an arch_initcall - * itself, depends on dmi_scan_machine() having been called already. - */ - dmi_scan_machine(); - if (dmi_available) - dmi_set_dump_stack_arch_desc(); - return 0; -} -core_initcall(arm64_dmi_init); - /* * UpdateCapsule() depends on the system being shutdown via * ResetSystem(). diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6e0e16a3a7d4..321119881abf 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -961,8 +961,7 @@ void smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) + if (system_state <= SYSTEM_RUNNING) pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_STOP); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..d0cb007fa482 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk) /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec = tk->raw_time.tv_nsec; + vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec << + tk->tkr_raw.shift) + + tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b4671bd7c..76320e920965 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12 diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h index 85d4af97c986..dbdbb8a558df 100644 --- a/arch/blackfin/include/asm/processor.h +++ b/arch/blackfin/include/asm/processor.h @@ -75,11 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (tsk->thread.pc) - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index b9eb3da7f278..7c87b5be53b5 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -96,11 +96,6 @@ static inline void release_thread(struct task_struct *dead_task) #define release_segments(mm) do { } while (0) /* - * saved PC of a blocked thread. - */ -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - -/* * saved kernel SP and DP of a blocked thread. */ #ifdef _BIG_ENDIAN diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index e299d30105b5..a2cdb1521aca 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -69,14 +69,6 @@ void hard_reset_now (void) while(1) /* waiting for RETRIBUTION! */ ; } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->irp; -} - /* setup the child's kernel stack with a pt_regs and switch_stack on it. * it will be un-nested during _resume and _ret_from_sys_call when the * new thread is scheduled. diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index c530a8fa87ce..fe87b383fbf3 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -85,14 +85,6 @@ hard_reset_now(void) } /* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *t) -{ - return task_pt_regs(t)->erp; -} - -/* * Setup the child's kernel stack with a pt_regs and call switch_stack() on it. * It will be unnested during _resume and _ret_from_sys_call when the new thread * is scheduled. diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h index 15b815df29c1..bc2729e4b2c9 100644 --- a/arch/cris/include/asm/processor.h +++ b/arch/cris/include/asm/processor.h @@ -52,8 +52,6 @@ unsigned long get_wchan(struct task_struct *p); #define KSTK_ESP(tsk) ((tsk) == current ? rdusp() : (tsk)->thread.usp) -extern unsigned long thread_saved_pc(struct task_struct *tsk); - /* Free all resources held by a thread. */ static inline void release_thread(struct task_struct *dead_task) { diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index ddaeb9cc9143..e4d08d74ed9f 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -96,11 +96,6 @@ extern asmlinkage void *restore_user_regs(const struct user_context *target, ... #define release_segments(mm) do { } while (0) #define forget_segments() do { } while (0) -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc) diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 5a4c92abc99e..a957b374e3a6 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -198,15 +198,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(tsk->thread.pc)) - return ((unsigned long *)tsk->thread.fp)[2]; - else - return tsk->thread.pc; -} - int elf_check_arch(const struct elf32_hdr *hdr) { unsigned long hsr0 = __get_HSR(0); diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h index 65132d7ae9e5..afa53147e66a 100644 --- a/arch/h8300/include/asm/processor.h +++ b/arch/h8300/include/asm/processor.h @@ -110,10 +110,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 0f5db5bb561b..d1ddcabbbe83 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -129,11 +129,6 @@ int copy_thread(unsigned long clone_flags, return 0; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((struct pt_regs *)tsk->thread.esp0)->pc; -} - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, pc; diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h index 45a825402f63..ce67940860a5 100644 --- a/arch/hexagon/include/asm/processor.h +++ b/arch/hexagon/include/asm/processor.h @@ -33,9 +33,6 @@ /* task_struct, defined elsewhere, is the "process descriptor" */ struct task_struct; -/* this is defined in arch/process.c */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *, unsigned long, unsigned long); /* diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index de715bab7956..656050c2e6a0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -61,14 +61,6 @@ void arch_cpu_idle(void) } /* - * Return saved PC of a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return 0; -} - -/* * Copy architecture-specific thread state */ int copy_thread(unsigned long clone_flags, unsigned long usp, diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 26a63d69c599..ab982f07ea68 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -602,23 +602,6 @@ ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) } /* - * Return saved PC of a blocked thread. - * Note that the only way T can block is through a call to schedule() -> switch_to(). - */ -static inline unsigned long -thread_saved_pc (struct task_struct *t) -{ - struct unw_frame_info info; - unsigned long ip; - - unw_init_from_blocked_task(&info, t); - if (unw_unwind(&info) < 0) - return 0; - unw_get_ip(&info, &ip); - return ip; -} - -/* * Get the current instruction/program counter value. */ #define current_text_addr() \ diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 5767367550c6..657874eeeccc 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -122,8 +122,6 @@ extern void release_thread(struct task_struct *); extern void copy_segments(struct task_struct *p, struct mm_struct * mm); extern void release_segments(struct mm_struct * mm); -extern unsigned long thread_saved_pc(struct task_struct *); - /* Copy and release all segment info associated with a VM */ #define copy_segments(p, mm) do { } while (0) #define release_segments(mm) do { } while (0) diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index d8ffcfec599c..8cd7e03f4370 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -39,14 +39,6 @@ #include <linux/err.h> -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return tsk->thread.lr; -} - void (*pm_power_off)(void) = NULL; EXPORT_SYMBOL(pm_power_off); diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h index 77239e81379b..94c36030440c 100644 --- a/arch/m68k/include/asm/processor.h +++ b/arch/m68k/include/asm/processor.h @@ -130,8 +130,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) \ diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index e475c945c8b2..7df92f8b0781 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -40,20 +40,6 @@ asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); - -/* - * Return saved PC from a blocked thread - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp; - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(sw->retpc)) - return ((unsigned long *)sw->a6)[1]; - else - return sw->retpc; -} - void arch_cpu_idle(void) { #if defined(MACH_ATARI_ONLY) diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index 232a12bf3f99..2dbbb7c66043 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -567,8 +567,7 @@ static void stop_this_cpu(void *data) { unsigned int cpu = smp_processor_id(); - if (system_state == SYSTEM_BOOTING || - system_state == SYSTEM_RUNNING) { + if (system_state <= SYSTEM_RUNNING) { spin_lock(&stop_lock); pr_crit("CPU%u: stopping\n", cpu); dump_stack(); diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index 37ef196e4519..330d556860ba 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -69,8 +69,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - extern unsigned long get_wchan(struct task_struct *p); # define KSTK_EIP(tsk) (0) @@ -121,10 +119,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved (kernel) PC of a blocked thread. */ -# define thread_saved_pc(tsk) \ - ((tsk)->thread.regs ? (tsk)->thread.regs->r15 : 0) - unsigned long get_wchan(struct task_struct *p); /* The size allocated for kernel stacks. This _must_ be a power of two! */ diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index e92a817e645f..6527ec22f158 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -119,23 +119,6 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -#ifndef CONFIG_MMU -/* - * Return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct cpu_context *ctx = - &(((struct thread_info *)(tsk->stack))->cpu_context); - - /* Check whether the thread is blocked in resume() */ - if (in_sched_functions(ctx->r15)) - return (unsigned long)ctx->r15; - else - return ctx->r14; -} -#endif - unsigned long get_wchan(struct task_struct *p) { /* TBD (used by procfs) */ diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 8d83fc2a96b7..38a302919e6b 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -11,6 +11,7 @@ #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/compiler.h> +#include <asm/irqflags.h> #include <asm/regdef.h> #include <asm/mipsregs.h> #include <asm/stackframe.h> @@ -119,6 +120,7 @@ work_pending: andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS beqz t0, work_notifysig work_resched: + TRACE_IRQS_OFF jal schedule local_irq_disable # make sure need_resched and @@ -155,6 +157,7 @@ syscall_exit_work: beqz t0, work_pending # trace bit set? local_irq_enable # could let syscall_trace_leave() # call schedule() instead + TRACE_IRQS_ON move a0, sp jal syscall_trace_leave b resume_userspace diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index cf052204eb0a..d1bb506adc10 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -106,8 +106,8 @@ NESTED(kernel_entry, 16, sp) # kernel entry point beq t0, t1, dtb_found #endif li t1, -2 - beq a0, t1, dtb_found move t2, a1 + beq a0, t1, dtb_found li t2, 0 dtb_found: diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 5f928c34c148..d99416094ba9 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -56,7 +56,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT); * state. Actually per-core rather than per-CPU. */ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count); -static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc); /* Indicates online CPUs coupled with the current CPU */ static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled); @@ -642,7 +641,6 @@ static int cps_pm_online_cpu(unsigned int cpu) { enum cps_pm_state state; unsigned core = cpu_data[cpu].core; - unsigned dlinesz = cpu_data[cpu].dcache.linesz; void *entry_fn, *core_rc; for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) { @@ -662,16 +660,11 @@ static int cps_pm_online_cpu(unsigned int cpu) } if (!per_cpu(ready_count, core)) { - core_rc = kmalloc(dlinesz * 2, GFP_KERNEL); + core_rc = kmalloc(sizeof(u32), GFP_KERNEL); if (!core_rc) { pr_err("Failed allocate core %u ready_count\n", core); return -ENOMEM; } - per_cpu(ready_count_alloc, core) = core_rc; - - /* Ensure ready_count is aligned to a cacheline boundary */ - core_rc += dlinesz - 1; - core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1)); per_cpu(ready_count, core) = core_rc; } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 9681b5877140..38dfa27730ff 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -201,6 +201,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; mm_segment_t old_fs = get_fs(); + + regs.cp0_status = KSU_KERNEL; if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7c6336dd2638..7cd92166a0b9 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi) int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, bool user, bool kernel) { - int idx_user, idx_kernel; + /* + * Initialize idx_user and idx_kernel to workaround bogus + * maybe-initialized warning when using GCC 6. + */ + int idx_user = 0, idx_kernel = 0; unsigned long flags, old_entryhi; local_irq_save(flags); diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 4a2d03c72959..caa62f20a888 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -54,7 +54,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, return ieee754dp_nanxcpt(z); case IEEE754_CLASS_DNORM: DPDNORMZ; - /* QNAN is handled separately below */ + /* QNAN and ZERO cases are handled separately below */ } switch (CLPAIR(xc, yc)) { @@ -210,6 +210,9 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, } assert(rm & (DP_HIDDEN_BIT << 3)); + if (zc == IEEE754_CLASS_ZERO) + return ieee754dp_format(rs, re, rm); + /* And now the addition */ assert(zm & DP_HIDDEN_BIT); diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index a8cd8b4f235e..c91d5e5d9b5f 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -54,7 +54,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, return ieee754sp_nanxcpt(z); case IEEE754_CLASS_DNORM: SPDNORMZ; - /* QNAN is handled separately below */ + /* QNAN and ZERO cases are handled separately below */ } switch (CLPAIR(xc, yc)) { @@ -203,6 +203,9 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, } assert(rm & (SP_HIDDEN_BIT << 3)); + if (zc == IEEE754_CLASS_ZERO) + return ieee754sp_format(rs, re, rm); + /* And now the addition */ assert(zm & SP_HIDDEN_BIT); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index fe8df14b6169..e08598c70b3e 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -68,12 +68,25 @@ static inline struct page *dma_addr_to_page(struct device *dev, * systems and only the R10000 and R12000 are used in such systems, the * SGI IP28 Indigo² rsp. SGI IP32 aka O2. */ -static inline int cpu_needs_post_dma_flush(struct device *dev) +static inline bool cpu_needs_post_dma_flush(struct device *dev) { - return !plat_device_is_coherent(dev) && - (boot_cpu_type() == CPU_R10000 || - boot_cpu_type() == CPU_R12000 || - boot_cpu_type() == CPU_BMIPS5000); + if (plat_device_is_coherent(dev)) + return false; + + switch (boot_cpu_type()) { + case CPU_R10000: + case CPU_R12000: + case CPU_BMIPS5000: + return true; + + default: + /* + * Presence of MAARs suggests that the CPU supports + * speculatively prefetching data, and therefore requires + * the post-DMA flush/invalidate. + */ + return cpu_has_maar; + } } static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 18e17abf7664..3ae479117b42 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -132,11 +132,6 @@ static inline void start_thread(struct pt_regs *regs, /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(task) ((task)->thread.uregs) diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index c9fa42619c6a..89e8027e07fb 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -40,14 +40,6 @@ #include "internal.h" /* - * return saved PC of a blocked thread. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return ((unsigned long *) tsk->thread.sp)[3]; -} - -/* * power off function, if any */ void (*pm_power_off)(void); diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h index 3bbbc3d798e5..4944e2e1d8b0 100644 --- a/arch/nios2/include/asm/processor.h +++ b/arch/nios2/include/asm/processor.h @@ -75,9 +75,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -/* Return saved PC of a blocked thread. */ -#define thread_saved_pc(tsk) ((tsk)->thread.kregs->ea) - extern unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(p) \ diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h index a908e6c30a00..396d8f306c21 100644 --- a/arch/openrisc/include/asm/processor.h +++ b/arch/openrisc/include/asm/processor.h @@ -84,11 +84,6 @@ void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp); void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - #define init_stack (init_thread_union.stack) #define cpu_relax() barrier() diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 106859ae27ff..f9b77003f113 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -110,11 +110,6 @@ void show_regs(struct pt_regs *regs) show_registers(regs); } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return (unsigned long)user_regs(t->stack)->pc; -} - void release_thread(struct task_struct *dead_task) { } diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index a3661ee6b060..4c6694b4e77e 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -163,12 +163,7 @@ struct thread_struct { .flags = 0 \ } -/* - * Return saved PC of a blocked thread. This is used by ps mostly. - */ - struct task_struct; -unsigned long thread_saved_pc(struct task_struct *t); void show_trace(struct task_struct *task, unsigned long *stack); /* diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 4516a5b53f38..b64d7d21646e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -239,11 +239,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp, return 0; } -unsigned long thread_saved_pc(struct task_struct *t) -{ - return t->thread.regs.kpc; -} - unsigned long get_wchan(struct task_struct *p) { diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index a83821f33ea3..8814a7249ceb 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_handler(struct pt_regs *regs); extern int kprobe_post_handler(struct pt_regs *regs); +extern int is_current_kprobe_addr(unsigned long addr); #ifdef CONFIG_KPROBES_ON_FTRACE extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb); diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bb99b651085a..1189d04f3bd1 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -378,12 +378,6 @@ struct thread_struct { } #endif -/* - * Return saved PC of a blocked thread. For now, this is the "user" PC - */ -#define thread_saved_pc(tsk) \ - ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0) - #define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs) unsigned long get_wchan(struct task_struct *p); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 5c0d8a8cdae5..41e88d3ce36b 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -267,13 +267,7 @@ do { \ extern unsigned long __copy_tofrom_user(void __user *to, const void __user *from, unsigned long size); -#ifndef __powerpc64__ - -#define INLINE_COPY_FROM_USER -#define INLINE_COPY_TO_USER - -#else /* __powerpc64__ */ - +#ifdef __powerpc64__ static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae418b85c17c..b886795060fd 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1411,10 +1411,8 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa410 /* weird error? */ + andis. r0,r4,0xa450 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ - andis. r0,r4,DSISR_DABRMATCH@h - bne- handle_dabr_fault CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ @@ -1438,11 +1436,16 @@ do_hash_page: /* Error */ blt- 13f + + /* Reload DSISR into r4 for the DABR check below */ + ld r4,_DSISR(r1) #endif /* CONFIG_PPC_STD_MMU_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: -11: ld r4,_DAR(r1) +11: andis. r0,r4,DSISR_DABRMATCH@h + bne- handle_dabr_fault + ld r4,_DAR(r1) ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_page_fault diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index fc4343514bed..01addfb0ed0a 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; +int is_current_kprobe_addr(unsigned long addr) +{ + struct kprobe *p = kprobe_running(); + return (p && (unsigned long)p->addr == addr) ? 1 : 0; +} + bool arch_within_kprobe_blacklist(unsigned long addr) { return (addr >= (unsigned long)__kprobes_text_start && @@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif + /* + * jprobes use jprobe_return() which skips the normal return + * path of the function, and this messes up the accounting of the + * function graph tracer. + * + * Pause function graph tracing while performing the jprobe function. + */ + pause_graph_tracing(); + return 1; } NOKPROBE_SYMBOL(setjmp_pre_handler); @@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + /* It's OK to start function graph tracing again */ + unpause_graph_tracing(); preempt_enable_no_resched(); return 1; } diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index a8c1f99e9607..4640f6d64f8b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void) #endif /* + * Emergency stacks are used for a range of things, from asynchronous + * NMIs (system reset, machine check) to synchronous, process context. + * We set preempt_count to zero, even though that isn't necessarily correct. To + * get the right value we'd need to copy it from the previous thread_info, but + * doing that might fault causing more problems. + * TODO: what to do with accounting? + */ +static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) +{ + ti->task = NULL; + ti->cpu = cpu; + ti->preempt_count = 0; + ti->local_flags = 0; + ti->flags = 0; + klp_init_thread_info(ti); +} + +/* * Stack space used when we detect a bad kernel stack pointer, and * early in SMP boots before relocation is enabled. Exclusive emergency * stack for machine checks. @@ -633,24 +651,31 @@ void __init emergency_stack_init(void) * Since we use these as temporary stacks during secondary CPU * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. + * + * The IRQ stacks allocated elsewhere in this file are zeroed and + * initialized in kernel/irq.c. These are initialized here in order + * to have emergency stacks available as early as possible. */ limit = min(safe_stack_limit(), ppc64_rma_size); for_each_possible_cpu(i) { struct thread_info *ti; ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].emergency_sp = (void *)ti + THREAD_SIZE; #ifdef CONFIG_PPC_BOOK3S_64 /* emergency stack for NMI exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; /* emergency stack for machine check exception handling. */ ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); - klp_init_thread_info(ti); + memset(ti, 0, THREAD_SIZE); + emerg_stack_init_thread_info(ti, i); paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; #endif } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index df2a41647d8e..1069f74fca47 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -97,7 +97,7 @@ int smp_generic_cpu_bootable(unsigned int nr) /* Special case - we inhibit secondary thread startup * during boot if the user requests it. */ - if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { + if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) { if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) return 0; if (smt_enabled_at_boot diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index 7c933a99f5d5..c98e90b4ea7b 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller) stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(0,r1) - SAVE_8GPRS(8,r1) - SAVE_8GPRS(16,r1) - SAVE_8GPRS(24,r1) + SAVE_GPR(0, r1) + SAVE_10GPRS(2, r1) + SAVE_10GPRS(12, r1) + SAVE_10GPRS(22, r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE + std r8, GPR1(r1) /* Load special regs for save below */ mfmsr r8 @@ -95,18 +99,44 @@ ftrace_call: bl ftrace_stub nop - /* Load ctr with the possibly modified NIP */ - ld r3, _NIP(r1) - mtctr r3 + /* Load the possibly modified NIP */ + ld r15, _NIP(r1) + #ifdef CONFIG_LIVEPATCH - cmpd r14,r3 /* has NIP been altered? */ + cmpd r14, r15 /* has NIP been altered? */ +#endif + +#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE) + /* NIP has not been altered, skip over further checks */ + beq 1f + + /* Check if there is an active kprobe on us */ + subi r3, r14, 4 + bl is_current_kprobe_addr + nop + + /* + * If r3 == 1, then this is a kprobe/jprobe. + * else, this is livepatched function. + * + * The conditional branch for livepatch_handler below will use the + * result of this comparison. For kprobe/jprobe, we just need to branch to + * the new NIP, not call livepatch_handler. The branch below is bne, so we + * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want + * CR0[EQ] = (r3 == 1). + */ + cmpdi r3, 1 +1: #endif + /* Load CTR with the possibly modified NIP */ + mtctr r15 + /* Restore gprs */ - REST_8GPRS(0,r1) - REST_8GPRS(8,r1) - REST_8GPRS(16,r1) - REST_8GPRS(24,r1) + REST_GPR(0,r1) + REST_10GPRS(2,r1) + REST_10GPRS(12,r1) + REST_10GPRS(22,r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -119,7 +149,10 @@ ftrace_call: addi r1, r1, SWITCH_FRAME_SIZE #ifdef CONFIG_LIVEPATCH - /* Based on the cmpd above, if the NIP was altered handle livepatch */ + /* + * Based on the cmpd or cmpdi above, if the NIP was altered and we're + * not on a kprobe/jprobe, then handle livepatch. + */ bne- livepatch_handler #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 42b7a4fd57d9..8d1a365b8edc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); break; case KVM_REG_PPC_TB_OFFSET: + /* + * POWER9 DD1 has an erratum where writing TBU40 causes + * the timebase to lose ticks. So we don't let the + * timebase offset be changed on P9 DD1. (It is + * initialized to zero.) + */ + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + break; /* round up to multiple of 2^24 */ vcpu->arch.vcore->tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24); @@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; + unsigned long ebb_regs[3] = {}; /* shut up GCC */ + unsigned long user_tar = 0; + unsigned int user_vrsave; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; } + /* + * Don't allow entry with a suspended transaction, because + * the guest entry/exit code will lose it. + * If the guest has TM enabled, save away their TM-related SPRs + * (they will get restored by the TM unavailable interrupt). + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs && + (current->thread.regs->msr & MSR_TM)) { + if (MSR_TM_ACTIVE(current->thread.regs->msr)) { + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + run->fail_entry.hardware_entry_failure_reason = 0; + return -EINVAL; + } + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif + kvmppc_core_prepare_to_enter(vcpu); /* No need to go into the guest when all we'll do is come back out */ @@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_all_to_thread(current); + /* Save userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + ebb_regs[0] = mfspr(SPRN_EBBHR); + ebb_regs[1] = mfspr(SPRN_EBBRR); + ebb_regs[2] = mfspr(SPRN_BESCR); + user_tar = mfspr(SPRN_TAR); + } + user_vrsave = mfspr(SPRN_VRSAVE); + vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; @@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } } while (is_kvmppc_resume_guest(r)); + /* Restore userspace EBB and other register values */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_EBBHR, ebb_regs[0]); + mtspr(SPRN_EBBRR, ebb_regs[1]); + mtspr(SPRN_BESCR, ebb_regs[2]); + mtspr(SPRN_TAR, user_tar); + mtspr(SPRN_FSCR, current->thread.fscr); + } + mtspr(SPRN_VRSAVE, user_vrsave); + out: vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 0fdc4a28970b..404deb512844 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * Put whatever is in the decrementer into the * hypervisor decrementer. */ +BEGIN_FTR_SECTION + ld r5, HSTATE_KVM_VCORE(r13) + ld r6, VCORE_KVM(r5) + ld r9, KVM_HOST_LPCR(r6) + andis. r9, r9, LPCR_LD@h +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mfspr r8,SPRN_DEC mftb r7 - mtspr SPRN_HDEC,r8 +BEGIN_FTR_SECTION + /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */ + bne 32f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) extsw r8,r8 +32: mtspr SPRN_HDEC,r8 add r8,r8,r7 std r8,HSTATE_DECEXP(r13) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bdb3f76ceb6b..4888dd494604 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -32,12 +32,29 @@ #include <asm/opal.h> #include <asm/xive-regs.h> +/* Sign-extend HDEC if not on POWER9 */ +#define EXTEND_HDEC(reg) \ +BEGIN_FTR_SECTION; \ + extsw reg, reg; \ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +/* Stack frame offsets for kvmppc_hv_entry */ +#define SFS 144 +#define STACK_SLOT_TRAP (SFS-4) +#define STACK_SLOT_TID (SFS-16) +#define STACK_SLOT_PSSCR (SFS-24) +#define STACK_SLOT_PID (SFS-32) +#define STACK_SLOT_IAMR (SFS-40) +#define STACK_SLOT_CIABR (SFS-48) +#define STACK_SLOT_DAWR (SFS-56) +#define STACK_SLOT_DAWRX (SFS-64) + /* * Call kvmppc_hv_entry in real mode. * Must be called with interrupts hard-disabled. @@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ + /* HDEC may be larger than DEC for arch >= v3.00, but since the */ + /* HDEC value came from DEC in the first place, it will fit */ mfspr r3, SPRN_HDEC mtspr SPRN_DEC, r3 /* @@ -295,8 +314,9 @@ kvm_novcpu_wakeup: /* See if our timeslice has expired (HDEC is negative) */ mfspr r0, SPRN_HDEC + EXTEND_HDEC(r0) li r12, BOOK3S_INTERRUPT_HV_DECREMENTER - cmpwi r0, 0 + cmpdi r0, 0 blt kvm_novcpu_exit /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ @@ -319,10 +339,10 @@ kvm_novcpu_exit: bl kvmhv_accumulate_time #endif 13: mr r3, r12 - stw r12, 112-4(r1) + stw r12, STACK_SLOT_TRAP(r1) bl kvmhv_commence_exit nop - lwz r12, 112-4(r1) + lwz r12, STACK_SLOT_TRAP(r1) b kvmhv_switch_to_host /* @@ -390,8 +410,8 @@ kvm_secondary_got_guest: lbz r4, HSTATE_PTID(r13) cmpwi r4, 0 bne 63f - lis r6, 0x7fff - ori r6, r6, 0xffff + LOAD_REG_ADDR(r6, decrementer_max) + ld r6, 0(r6) mtspr SPRN_HDEC, r6 /* and set per-LPAR registers, if doing dynamic micro-threading */ ld r6, HSTATE_SPLIT_MODE(r13) @@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ -/* Stack frame offsets */ -#define STACK_SLOT_TID (112-16) -#define STACK_SLOT_PSSCR (112-24) -#define STACK_SLOT_PID (112-32) - .global kvmppc_hv_entry kvmppc_hv_entry: @@ -565,7 +580,7 @@ kvmppc_hv_entry: */ mflr r0 std r0, PPC_LR_STKOFF(r1) - stdu r1, -112(r1) + stdu r1, -SFS(r1) /* Save R1 in the PACA */ std r1, HSTATE_HOST_R1(r13) @@ -749,10 +764,20 @@ BEGIN_FTR_SECTION mfspr r5, SPRN_TIDR mfspr r6, SPRN_PSSCR mfspr r7, SPRN_PID + mfspr r8, SPRN_IAMR std r5, STACK_SLOT_TID(r1) std r6, STACK_SLOT_PSSCR(r1) std r7, STACK_SLOT_PID(r1) + std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +BEGIN_FTR_SECTION + mfspr r5, SPRN_CIABR + mfspr r6, SPRN_DAWR + mfspr r7, SPRN_DAWRX + std r5, STACK_SLOT_CIABR(r1) + std r6, STACK_SLOT_DAWR(r1) + std r7, STACK_SLOT_DAWRX(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION /* Set partition DABR */ @@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC - cmpwi r3, 512 /* 1 microsecond */ + EXTEND_HDEC(r3) + cmpdi r3, 512 /* 1 microsecond */ blt hdec_soon #ifdef CONFIG_KVM_XICS @@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) * set by the guest could disrupt the host. */ li r0, 0 - mtspr SPRN_IAMR, r0 - mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_PSPB, r0 mtspr SPRN_WORT, r0 BEGIN_FTR_SECTION + mtspr SPRN_IAMR, r0 mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 @@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) std r6,VCPU_UAMOR(r9) li r6,0 mtspr SPRN_AMR,r6 + mtspr SPRN_UAMOR, r6 /* Switch DSCR back to host value */ mfspr r8, SPRN_DSCR @@ -1670,12 +1696,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* Restore host values of some registers */ BEGIN_FTR_SECTION + ld r5, STACK_SLOT_CIABR(r1) + ld r6, STACK_SLOT_DAWR(r1) + ld r7, STACK_SLOT_DAWRX(r1) + mtspr SPRN_CIABR, r5 + mtspr SPRN_DAWR, r6 + mtspr SPRN_DAWRX, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) ld r7, STACK_SLOT_PID(r1) + ld r8, STACK_SLOT_IAMR(r1) mtspr SPRN_TIDR, r5 mtspr SPRN_PSSCR, r6 mtspr SPRN_PID, r7 + mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT @@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - ld r0, 112+PPC_LR_STKOFF(r1) - addi r1, r1, 112 + ld r0, SFS+PPC_LR_STKOFF(r1) + addi r1, r1, SFS mtlr r0 blr @@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) mfspr r3, SPRN_DEC mfspr r4, SPRN_HDEC mftb r5 - cmpw r3, r4 + extsw r3, r3 + EXTEND_HDEC(r4) + cmpd r3, r4 ble 67f mtspr SPRN_DEC, r4 67: /* save expiry time of guest decrementer */ - extsw r3, r3 add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index cbd82fde5770..09ceea6175ba 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, struct pt_regs *regs_user_copy) { regs_user->regs = task_pt_regs(current); - regs_user->abi = perf_reg_abi(current); + regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : + PERF_SAMPLE_REGS_ABI_NONE; } diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index e6f444b46207..b5d960d6db3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -449,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, return mmio_atsd_reg; } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) +static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) { unsigned long launch; @@ -465,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid) /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + /* Invalidating the entire process doesn't use a va */ return mmio_launch_invalidate(npu, launch, 0); } static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid) + unsigned long pid, bool flush) { unsigned long launch; @@ -486,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va, /* PID */ launch |= pid << PPC_BITLSHIFT(38); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); + return mmio_launch_invalidate(npu, launch, va); } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + +static void mmio_invalidate_wait( + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) +{ + struct npu *npu; + int i, reg; + + /* Wait for all invalidations to complete */ + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* Wait for completion */ + npu = mmio_atsd_reg[i].npu; + reg = mmio_atsd_reg[i].reg; + while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) + cpu_relax(); + + put_mmio_atsd_reg(npu, reg); + + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + if (flush) + mmio_invalidate_pid(npu, 0, true); + } +} + /* * Invalidate either a single address or an entire PID depending on * the value of va. */ static void mmio_invalidate(struct npu_context *npu_context, int va, - unsigned long address) + unsigned long address, bool flush) { - int i, j, reg; + int i, j; struct npu *npu; struct pnv_phb *nphb; struct pci_dev *npdev; - struct { - struct npu *npu; - int reg; - } mmio_atsd_reg[NV_MAX_NPUS]; + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; /* @@ -525,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, if (va) mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid); + mmio_invalidate_va(npu, address, pid, + flush); else mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid); + mmio_invalidate_pid(npu, pid, flush); /* * The NPU hardware forwards the shootdown to all GPUs @@ -544,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, */ flush_tlb_mm(npu_context->mm); - /* Wait for all invalidations to complete */ - for (i = 0; i <= max_npu2_index; i++) { - if (mmio_atsd_reg[i].reg < 0) - continue; - - /* Wait for completion */ - npu = mmio_atsd_reg[i].npu; - reg = mmio_atsd_reg[i].reg; - while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) - cpu_relax(); - put_mmio_atsd_reg(npu, reg); - } + mmio_invalidate_wait(mmio_atsd_reg, flush); + if (flush) + /* Wait for the flush to complete */ + mmio_invalidate_wait(mmio_atsd_reg, false); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -571,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn, * There should be no more translation requests for this PID, but we * need to ensure any entries for it are removed from the TLB. */ - mmio_invalidate(npu_context, 0, 0); + mmio_invalidate(npu_context, 0, 0, true); } static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, @@ -581,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, @@ -590,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn, { struct npu_context *npu_context = mn_to_npu_context(mn); - mmio_invalidate(npu_context, 1, address); + mmio_invalidate(npu_context, 1, address, true); } static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, @@ -600,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn, struct npu_context *npu_context = mn_to_npu_context(mn); unsigned long address; - for (address = start; address <= end; address += PAGE_SIZE) - mmio_invalidate(npu_context, 1, address); + for (address = start; address < end; address += PAGE_SIZE) + mmio_invalidate(npu_context, 1, address, false); + + /* Do the flush only on the final addess == end */ + mmio_invalidate(npu_context, 1, address, true); } static const struct mmu_notifier_ops nv_nmmu_notifier_ops = { @@ -651,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, /* No nvlink associated with this GPU device */ return ERR_PTR(-ENODEV); - if (!mm) { - /* kernel thread contexts are not supported */ + if (!mm || mm->context.id == 0) { + /* + * Kernel thread contexts are not supported and context id 0 is + * reserved on the GPU. + */ return ERR_PTR(-EINVAL); } diff --git a/arch/s390/include/asm/eadm.h b/arch/s390/include/asm/eadm.h index 67026300c88e..144809a3f4f6 100644 --- a/arch/s390/include/asm/eadm.h +++ b/arch/s390/include/asm/eadm.h @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/device.h> +#include <linux/blkdev.h> struct arqb { u64 data; @@ -105,13 +106,14 @@ struct scm_driver { int (*probe) (struct scm_device *scmdev); int (*remove) (struct scm_device *scmdev); void (*notify) (struct scm_device *scmdev, enum scm_event event); - void (*handler) (struct scm_device *scmdev, void *data, int error); + void (*handler) (struct scm_device *scmdev, void *data, + blk_status_t error); }; int scm_driver_register(struct scm_driver *scmdrv); void scm_driver_unregister(struct scm_driver *scmdrv); int eadm_start_aob(struct aob *aob); -void scm_irq_handler(struct aob *aob, int error); +void scm_irq_handler(struct aob *aob, blk_status_t error); #endif /* _ASM_S390_EADM_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 60d395fdc864..aeac013968f2 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -221,11 +221,6 @@ extern void release_thread(struct task_struct *); /* Free guarded storage control block for current */ void exit_thread_gs(void); -/* - * Return saved PC of a blocked thread. - */ -extern unsigned long thread_saved_pc(struct task_struct *t); - unsigned long get_wchan(struct task_struct *p); #define task_pt_regs(tsk) ((struct pt_regs *) \ (task_stack_page(tsk) + THREAD_SIZE) - 1) diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index e784bed6ed7f..2b498e58b914 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -109,7 +109,7 @@ struct sysinfo_2_2_2 { unsigned short cpus_shared; char reserved_4[3]; unsigned char vsne; - uuid_be uuid; + uuid_t uuid; char reserved_5[160]; char ext_name[256]; }; @@ -134,7 +134,7 @@ struct sysinfo_3_2_2 { char reserved_1[3]; unsigned char evmne; unsigned int reserved_2; - uuid_be uuid; + uuid_t uuid; } vm[8]; char reserved_3[1504]; char ext_names[8][256]; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index e545ffe5155a..8e622bb52f7a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,8 +564,6 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { - if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); @@ -1088,10 +1086,7 @@ static void __reipl_run(void *unused) break; case REIPL_METHOD_CCW_DIAG: diag308(DIAG308_SET, reipl_block_ccw); - if (MACHINE_IS_LPAR) - diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); - else - diag308(DIAG308_LOAD_CLEAR, NULL); + diag308(DIAG308_LOAD_CLEAR, NULL); break; case REIPL_METHOD_FCP_RW_DIAG: diag308(DIAG308_SET, reipl_block_fcp); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 999d7154bbdc..bb32b8618bf6 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -41,31 +41,6 @@ asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); -/* - * Return saved PC of a blocked thread. used in kernel/sched. - * resume in entry.S does not create a new stack frame, it - * just stores the registers %r6-%r15 to the frame given by - * schedule. We want to return the address of the caller of - * schedule, so we have to walk the backchain one time to - * find the frame schedule() store its return address. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct stack_frame *sf, *low, *high; - - if (!tsk || !task_stack_page(tsk)) - return 0; - low = task_stack_page(tsk); - high = (struct stack_frame *) task_pt_regs(tsk); - sf = (struct stack_frame *) tsk->thread.ksp; - if (sf <= low || sf > high) - return 0; - sf = (struct stack_frame *) sf->back_chain; - if (sf <= low || sf > high) - return 0; - return sf->gprs[8]; -} - extern void kernel_thread_starter(void); /* diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index eefcb54872a5..fb869b103825 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -242,7 +242,7 @@ static void print_ext_name(struct seq_file *m, int lvl, static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info) { - if (!memcmp(&info->vm[i].uuid, &NULL_UUID_BE, sizeof(uuid_be))) + if (uuid_is_null(&info->vm[i].uuid)) return; seq_printf(m, "VM%02d UUID: %pUb\n", i, &info->vm[i].uuid); } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9da243d94cc3..3b297fa3aa67 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -977,11 +977,12 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, ptr = asce.origin * 4096; if (asce.r) { *fake = 1; + ptr = 0; asce.dt = ASCE_TYPE_REGION1; } switch (asce.dt) { case ASCE_TYPE_REGION1: - if (vaddr.rfx01 > asce.tl && !asce.r) + if (vaddr.rfx01 > asce.tl && !*fake) return PGM_REGION_FIRST_TRANS; break; case ASCE_TYPE_REGION2: @@ -1009,8 +1010,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, union region1_table_entry rfte; if (*fake) { - /* offset in 16EB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + ptr += (unsigned long) vaddr.rfx << 53; rfte.val = ptr; goto shadow_r2t; } @@ -1036,8 +1036,7 @@ shadow_r2t: union region2_table_entry rste; if (*fake) { - /* offset in 8PB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + ptr += (unsigned long) vaddr.rsx << 42; rste.val = ptr; goto shadow_r3t; } @@ -1064,8 +1063,7 @@ shadow_r3t: union region3_table_entry rtte; if (*fake) { - /* offset in 4TB guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + ptr += (unsigned long) vaddr.rtx << 31; rtte.val = ptr; goto shadow_sgt; } @@ -1101,8 +1099,7 @@ shadow_sgt: union segment_table_entry ste; if (*fake) { - /* offset in 2G guest memory block */ - ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ptr += (unsigned long) vaddr.sx << 20; ste.val = ptr; goto shadow_pgt; } diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h index d9a922d8711b..299274581968 100644 --- a/arch/score/include/asm/processor.h +++ b/arch/score/include/asm/processor.h @@ -13,7 +13,6 @@ struct task_struct; */ extern void (*cpu_wait)(void); -extern unsigned long thread_saved_pc(struct task_struct *tsk); extern void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp); extern unsigned long get_wchan(struct task_struct *p); diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index eb64d7a677cb..6e20241a1ed4 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -101,11 +101,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *r) return 1; } -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_pt_regs(tsk)->cp0_epc; -} - unsigned long get_wchan(struct task_struct *task) { if (!task || task == current || task->state == TASK_RUNNING) diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h index dd27159819eb..b395e5620c0b 100644 --- a/arch/sparc/include/asm/processor_32.h +++ b/arch/sparc/include/asm/processor_32.h @@ -67,9 +67,6 @@ struct thread_struct { .current_ds = KERNEL_DS, \ } -/* Return saved PC of a blocked thread. */ -unsigned long thread_saved_pc(struct task_struct *t); - /* Do necessary setup to start up a newly executed thread. */ static inline void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index b58ee9018433..f04dc5a43062 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -89,9 +89,7 @@ struct thread_struct { #include <linux/types.h> #include <asm/fpumacro.h> -/* Return saved PC of a blocked thread. */ struct task_struct; -unsigned long thread_saved_pc(struct task_struct *); /* On Uniprocessor, even in RMO processes see TSO semantics */ #ifdef CONFIG_SMP diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index b6dac8e980f0..9245f93398c7 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -177,14 +177,6 @@ void show_stack(struct task_struct *tsk, unsigned long *_ksp) } /* - * Note: sparc64 has a pretty intricated thread_saved_pc, check it out. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - return task_thread_info(tsk)->kpc; -} - -/* * Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 1badc493e62e..b96104da5bd6 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -400,25 +400,6 @@ core_initcall(sparc_sysrq_init); #endif -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct thread_info *ti = task_thread_info(tsk); - unsigned long ret = 0xdeadbeefUL; - - if (ti && ti->ksp) { - unsigned long *sp; - sp = (unsigned long *)(ti->ksp + STACK_BIAS); - if (((unsigned long)sp & (sizeof(long) - 1)) == 0UL && - sp[14]) { - unsigned long *fp; - fp = (unsigned long *)(sp[14] + STACK_BIAS); - if (((unsigned long)fp & (sizeof(long) - 1)) == 0UL) - ret = fp[15]; - } - } - return ret; -} - /* Free current thread data structures etc.. */ void exit_thread(struct task_struct *tsk) { diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index 0bc9968b97a1..f71e5206650b 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -214,13 +214,6 @@ static inline void release_thread(struct task_struct *dead_task) extern void prepare_exit_to_usermode(struct pt_regs *regs, u32 flags); - -/* - * Return saved (kernel) PC of a blocked thread. - * Only used in a printk() in kernel/sched/core.c, so don't work too hard. - */ -#define thread_saved_pc(t) ((t)->thread.pc) - unsigned long get_wchan(struct task_struct *p); /* Return initial ksp value for given task. */ diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 1a70e6c0f259..94709ab41ed8 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S @@ -24,8 +24,7 @@ * has an opportunity to return -EFAULT to the user if needed. * The 64-bit routines just return a "long long" with the value, * since they are only used from kernel space and don't expect to fault. - * Support for 16-bit ops is included in the framework but we don't provide - * any (x86_64 has an atomic_inc_short(), so we might want to some day). + * Support for 16-bit ops is included in the framework but we don't provide any. * * Note that the caller is advised to issue a suitable L1 or L2 * prefetch on the address being manipulated to avoid extra stalls. diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 85410279beab..b55fe9bf5d3e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -534,7 +534,7 @@ static void ubd_handler(void) for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { blk_end_request( (*irq_req_buffer)[count]->req, - 0, + BLK_STS_OK, (*irq_req_buffer)[count]->length ); kfree((*irq_req_buffer)[count]); diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index 2d1e0dd5bb0b..f6d1a3f747a9 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,8 +58,6 @@ static inline void release_thread(struct task_struct *task) { } -extern unsigned long thread_saved_pc(struct task_struct *t); - static inline void mm_copy_segments(struct mm_struct *from_mm, struct mm_struct *new_mm) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 64a1fd06f3fd..7b5640117325 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -56,12 +56,6 @@ union thread_union cpu0_irqstack __attribute__((__section__(".data..init_irqstack"))) = { INIT_THREAD_INFO(init_task) }; -unsigned long thread_saved_pc(struct task_struct *task) -{ - /* FIXME: Need to look up userspace_pid by cpu */ - return os_process_pc(userspace_pid[0]); -} - /* Changed in setup_arch, which is called in early boot */ static char host_info[(__NEW_UTS_LEN + 1) * 5]; diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 73ccf63b0f48..9dc1ce6ba3c0 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -13,7 +13,7 @@ static inline char rdfs8(addr_t addr) return *((char *)(fs + addr)); } #include "../cmdline.c" -static unsigned long get_cmd_line_ptr(void) +unsigned long get_cmd_line_ptr(void) { unsigned long cmd_line_ptr = boot_params->hdr.cmd_line_ptr; diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 54c24f0a43d3..91f27ab970ef 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -9,16 +9,42 @@ * contain the entire properly aligned running kernel image. * */ + +/* + * isspace() in linux/ctype.h is expected by next_args() to filter + * out "space/lf/tab". While boot/ctype.h conflicts with linux/ctype.h, + * since isdigit() is implemented in both of them. Hence disable it + * here. + */ +#define BOOT_CTYPE_H + +/* + * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. + * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL + * which is meaningless and will cause compiling error in some cases. + * So do not include linux/export.h and define EXPORT_SYMBOL(sym) + * as empty. + */ +#define _LINUX_EXPORT_H +#define EXPORT_SYMBOL(sym) + #include "misc.h" #include "error.h" -#include "../boot.h" +#include "../string.h" #include <generated/compile.h> #include <linux/module.h> #include <linux/uts.h> #include <linux/utsname.h> +#include <linux/ctype.h> #include <generated/utsrelease.h> +/* Macros used by the included decompressor code below. */ +#define STATIC +#include <linux/decompress/mm.h> + +extern unsigned long get_cmd_line_ptr(void); + /* Simplified build-specific string for starting entropy. */ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; @@ -62,6 +88,11 @@ struct mem_vector { static bool memmap_too_large; + +/* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */ +unsigned long long mem_limit = ULLONG_MAX; + + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, @@ -85,49 +116,14 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } -/** - * _memparse - Parse a string with mem suffixes into a number - * @ptr: Where parse begins - * @retptr: (output) Optional pointer to next char after parse completes - * - * Parses a string into a number. The number stored at @ptr is - * potentially suffixed with K, M, G, T, P, E. - */ -static unsigned long long _memparse(const char *ptr, char **retptr) +char *skip_spaces(const char *str) { - char *endptr; /* Local pointer to end of parsed string */ - - unsigned long long ret = simple_strtoull(ptr, &endptr, 0); - - switch (*endptr) { - case 'E': - case 'e': - ret <<= 10; - case 'P': - case 'p': - ret <<= 10; - case 'T': - case 't': - ret <<= 10; - case 'G': - case 'g': - ret <<= 10; - case 'M': - case 'm': - ret <<= 10; - case 'K': - case 'k': - ret <<= 10; - endptr++; - default: - break; - } - - if (retptr) - *retptr = endptr; - - return ret; + while (isspace(*str)) + ++str; + return (char *)str; } +#include "../../../../lib/ctype.c" +#include "../../../../lib/cmdline.c" static int parse_memmap(char *p, unsigned long long *start, unsigned long long *size) @@ -142,40 +138,41 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) return -EINVAL; oldp = p; - *size = _memparse(p, &p); + *size = memparse(p, &p); if (p == oldp) return -EINVAL; switch (*p) { - case '@': - /* Skip this region, usable */ - *start = 0; - *size = 0; - return 0; case '#': case '$': case '!': - *start = _memparse(p + 1, &p); + *start = memparse(p + 1, &p); + return 0; + case '@': + /* memmap=nn@ss specifies usable region, should be skipped */ + *size = 0; + /* Fall through */ + default: + /* + * If w/o offset, only size specified, memmap=nn[KMG] has the + * same behaviour as mem=nn[KMG]. It limits the max address + * system can use. Region above the limit should be avoided. + */ + *start = 0; return 0; } return -EINVAL; } -static void mem_avoid_memmap(void) +static void mem_avoid_memmap(char *str) { - char arg[128]; + static int i; int rc; - int i; - char *str; - /* See if we have any memmap areas */ - rc = cmdline_find_option("memmap", arg, sizeof(arg)); - if (rc <= 0) + if (i >= MAX_MEMMAP_REGIONS) return; - i = 0; - str = arg; while (str && (i < MAX_MEMMAP_REGIONS)) { int rc; unsigned long long start, size; @@ -188,9 +185,14 @@ static void mem_avoid_memmap(void) if (rc < 0) break; str = k; - /* A usable region that should not be skipped */ - if (size == 0) + + if (start == 0) { + /* Store the specified memory limit if size > 0 */ + if (size > 0) + mem_limit = size; + continue; + } mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; @@ -202,6 +204,57 @@ static void mem_avoid_memmap(void) memmap_too_large = true; } +static int handle_mem_memmap(void) +{ + char *args = (char *)get_cmd_line_ptr(); + size_t len = strlen((char *)args); + char *tmp_cmdline; + char *param, *val; + u64 mem_size; + + if (!strstr(args, "memmap=") && !strstr(args, "mem=")) + return 0; + + tmp_cmdline = malloc(len + 1); + if (!tmp_cmdline ) + error("Failed to allocate space for tmp_cmdline"); + + memcpy(tmp_cmdline, args, len); + tmp_cmdline[len] = 0; + args = tmp_cmdline; + + /* Chew leading spaces */ + args = skip_spaces(args); + + while (*args) { + args = next_arg(args, ¶m, &val); + /* Stop at -- */ + if (!val && strcmp(param, "--") == 0) { + warn("Only '--' specified in cmdline"); + free(tmp_cmdline); + return -1; + } + + if (!strcmp(param, "memmap")) { + mem_avoid_memmap(val); + } else if (!strcmp(param, "mem")) { + char *p = val; + + if (!strcmp(p, "nopentium")) + continue; + mem_size = memparse(p, &p); + if (mem_size == 0) { + free(tmp_cmdline); + return -EINVAL; + } + mem_limit = mem_size; + } + } + + free(tmp_cmdline); + return 0; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -323,7 +376,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ /* Mark the memmap regions we need to avoid */ - mem_avoid_memmap(); + handle_mem_memmap(); #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ @@ -432,7 +485,8 @@ static void process_e820_entry(struct boot_e820_entry *entry, { struct mem_vector region, overlap; struct slot_area slot_area; - unsigned long start_orig; + unsigned long start_orig, end; + struct boot_e820_entry cur_entry; /* Skip non-RAM entries. */ if (entry->type != E820_TYPE_RAM) @@ -446,8 +500,15 @@ static void process_e820_entry(struct boot_e820_entry *entry, if (entry->addr + entry->size < minimum) return; - region.start = entry->addr; - region.size = entry->size; + /* Ignore entries above memory limit */ + end = min(entry->size + entry->addr, mem_limit); + if (entry->addr >= end) + return; + cur_entry.addr = entry->addr; + cur_entry.size = end - entry->addr; + + region.start = cur_entry.addr; + region.size = cur_entry.size; /* Give up if slot area array is full. */ while (slot_area_index < MAX_SLOT_AREA) { @@ -461,7 +522,7 @@ static void process_e820_entry(struct boot_e820_entry *entry, region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); /* Did we raise the address above this e820 region? */ - if (region.start > entry->addr + entry->size) + if (region.start > cur_entry.addr + cur_entry.size) return; /* Reduce size by any delta from the original address. */ @@ -564,9 +625,6 @@ void choose_random_location(unsigned long input, { unsigned long random_addr, min_addr; - /* By default, keep output position unchanged. */ - *virt_addr = *output; - if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); return; diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b3c5a5f030ce..00241c815524 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned long output_len) { const unsigned long kernel_total_size = VO__end - VO__text; - unsigned long virt_addr = (unsigned long)output; + unsigned long virt_addr = LOAD_PHYSICAL_ADDR; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -390,6 +390,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); + if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); @@ -397,7 +399,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifndef CONFIG_RELOCATABLE if ((unsigned long)output != LOAD_PHYSICAL_ADDR) error("Destination address does not match LOAD_PHYSICAL_ADDR"); - if ((unsigned long)output != virt_addr) + if (virt_addr != LOAD_PHYSICAL_ADDR) error("Destination virtual address changed when not relocatable"); #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 1c8355eadbd1..766a5211f827 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input, unsigned long output_size, unsigned long *virt_addr) { - /* No change from existing output location. */ - *virt_addr = *output; } #endif diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 1eb7d298b47d..15d9f74b0008 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -65,23 +65,3 @@ GLOBAL(copy_to_fs) popw %es retl ENDPROC(copy_to_fs) - -#if 0 /* Not currently used, but can be enabled as needed */ -GLOBAL(copy_from_gs) - pushw %ds - pushw %gs - popw %ds - calll memcpy - popw %ds - retl -ENDPROC(copy_from_gs) - -GLOBAL(copy_to_gs) - pushw %es - pushw %gs - popw %es - calll memcpy - popw %es - retl -ENDPROC(copy_to_gs) -#endif diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 5457b02fc050..630e3664906b 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -122,6 +122,14 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas return result; } +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} + /** * strlen - Find the length of a string * @s: The string to be sized diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 113588ddb43f..f274a50db5fa 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -22,6 +22,7 @@ extern int strcmp(const char *str1, const char *str2); extern int strncmp(const char *cs, const char *ct, size_t count); extern size_t strlen(const char *s); extern char *strstr(const char *s1, const char *s2); +extern char *strchr(const char *s, int c); extern size_t strnlen(const char *s, size_t maxlen); extern unsigned int atou(const char *s); extern unsigned long long simple_strtoull(const char *cp, char **endp, diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 34b3fa2889d1..9e32d40d71bd 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile index 2f8756375df5..2e14acc3da25 100644 --- a/arch/x86/crypto/sha1-mb/Makefile +++ b/arch/x86/crypto/sha1-mb/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) ifeq ($(avx2_supported),yes) diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile index 41089e7c400c..45b4fca6c4a8 100644 --- a/arch/x86/crypto/sha256-mb/Makefile +++ b/arch/x86/crypto/sha256-mb/Makefile @@ -2,6 +2,8 @@ # Arch-specific CryptoAPI modules. # +OBJECT_FILES_NON_STANDARD := y + avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) ifeq ($(avx2_supported),yes) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 580b60f5ac83..628b8c556aab 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1750,6 +1750,8 @@ ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event) return ret; } +static struct attribute_group x86_pmu_attr_group; + static int __init init_hw_perf_events(void) { struct x86_pmu_quirk *quirk; @@ -1813,6 +1815,14 @@ static int __init init_hw_perf_events(void) x86_pmu_events_group.attrs = tmp; } + if (x86_pmu.attrs) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_attr_group.attrs, x86_pmu.attrs); + if (!WARN_ON(!tmp)) + x86_pmu_attr_group.attrs = tmp; + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -2255,7 +2265,7 @@ static struct pmu pmu = { void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - struct cyc2ns_data *data; + struct cyc2ns_data data; u64 offset; userpg->cap_user_time = 0; @@ -2267,17 +2277,17 @@ void arch_perf_update_userpage(struct perf_event *event, if (!using_native_sched_clock() || !sched_clock_stable()) return; - data = cyc2ns_read_begin(); + cyc2ns_read_begin(&data); - offset = data->cyc2ns_offset + __sched_clock_offset; + offset = data.cyc2ns_offset + __sched_clock_offset; /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. */ userpg->cap_user_time = 1; - userpg->time_mult = data->cyc2ns_mul; - userpg->time_shift = data->cyc2ns_shift; + userpg->time_mult = data.cyc2ns_mul; + userpg->time_shift = data.cyc2ns_shift; userpg->time_offset = offset - now; /* @@ -2289,7 +2299,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->time_zero = offset; } - cyc2ns_read_end(data); + cyc2ns_read_end(); } void diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6d91d4e37a1..31acf2a98394 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -431,11 +431,11 @@ static __initconst const u64 skl_hw_cache_event_ids [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_INST_RETIRED.ALL_LOADS */ - [ C(RESULT_MISS) ] = 0x608, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe08, /* DTLB_LOAD_MISSES.WALK_COMPLETED */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_INST_RETIRED.ALL_STORES */ - [ C(RESULT_MISS) ] = 0x649, /* DTLB_STORE_MISSES.WALK_COMPLETED */ + [ C(RESULT_MISS) ] = 0xe49, /* DTLB_STORE_MISSES.WALK_COMPLETED */ }, [ C(OP_PREFETCH) ] = { [ C(RESULT_ACCESS) ] = 0x0, @@ -3160,6 +3160,19 @@ err: return -ENOMEM; } +static void flip_smm_bit(void *data) +{ + unsigned long set = *(unsigned long *)data; + + if (set > 0) { + msr_set_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } else { + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, + DEBUGCTLMSR_FREEZE_IN_SMM_BIT); + } +} + static void intel_pmu_cpu_starting(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); @@ -3174,6 +3187,8 @@ static void intel_pmu_cpu_starting(int cpu) cpuc->lbr_sel = NULL; + flip_smm_bit(&x86_pmu.attr_freeze_on_smi); + if (!cpuc->shared_regs) return; @@ -3595,6 +3610,52 @@ static struct attribute *hsw_events_attrs[] = { NULL }; +static ssize_t freeze_on_smi_show(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi); +} + +static DEFINE_MUTEX(freeze_on_smi_mutex); + +static ssize_t freeze_on_smi_store(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val; + ssize_t ret; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + mutex_lock(&freeze_on_smi_mutex); + + if (x86_pmu.attr_freeze_on_smi == val) + goto done; + + x86_pmu.attr_freeze_on_smi = val; + + get_online_cpus(); + on_each_cpu(flip_smm_bit, &val, 1); + put_online_cpus(); +done: + mutex_unlock(&freeze_on_smi_mutex); + + return count; +} + +static DEVICE_ATTR_RW(freeze_on_smi); + +static struct attribute *intel_pmu_attrs[] = { + &dev_attr_freeze_on_smi.attr, + NULL, +}; + __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -3641,6 +3702,8 @@ __init int intel_pmu_init(void) x86_pmu.max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); + + x86_pmu.attrs = intel_pmu_attrs; /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events, when not running in a hypervisor: diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index f924629836a8..eb261656a320 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -18,7 +18,7 @@ enum { LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, }; -static enum { +static const enum { LBR_EIP_FLAGS = 1, LBR_TSX = 2, } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { @@ -287,7 +287,7 @@ inline u64 lbr_from_signext_quirk_wr(u64 val) /* * If quirk is needed, ensure sign extension is 61 bits: */ -u64 lbr_from_signext_quirk_rd(u64 val) +static u64 lbr_from_signext_quirk_rd(u64 val) { if (static_branch_unlikely(&lbr_from_quirk_key)) { /* diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 758c1aa5009d..44ec523287f6 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1170,7 +1170,7 @@ static int uncore_event_cpu_online(unsigned int cpu) pmu = type->pmus; for (i = 0; i < type->num_boxes; i++, pmu++) { box = pmu->boxes[pkg]; - if (!box && atomic_inc_return(&box->refcnt) == 1) + if (box && atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index be3d36254040..53728eea1bed 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -562,6 +562,9 @@ struct x86_pmu { ssize_t (*events_sysfs_show)(char *page, u64 config); struct attribute **cpu_events; + unsigned long attr_freeze_on_smi; + struct attribute **attrs; + /* * CPU Hotplug hooks */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index caa5798c92f4..33380b871463 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -246,19 +246,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } -/** - * atomic_inc_short - increment of a short integer - * @v: pointer to type int - * - * Atomically adds 1 to @v - * Returns the new value of @u - */ -static __always_inline short int atomic_inc_short(short int *v) -{ - asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v)); - return *v; -} - #ifdef CONFIG_X86_32 # include <asm/atomic64_32.h> #else diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 055962615779..722d0e568863 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -296,6 +296,7 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ + bool tf; /* TF value before instruction (after for syscall/sysret) */ bool have_exception; struct x86_exception exception; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index fba100713924..d5acc27ed1cc 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,8 +2,7 @@ #define _ASM_X86_MSHYPER_H #include <linux/types.h> -#include <linux/interrupt.h> -#include <linux/clocksource.h> +#include <linux/atomic.h> #include <asm/hyperv.h> /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 673f9ac50f6d..18b162322eff 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -137,6 +137,8 @@ #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) #define MSR_PEBS_FRONTEND 0x000003f7 diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 55fa56fe4e45..a3dcf8944cb9 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -118,7 +118,7 @@ static inline u64 paravirt_read_msr(unsigned msr) static inline void paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { - return PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); + PVOP_VCALL3(pv_cpu_ops.write_msr, msr, low, high); } static inline u64 paravirt_read_msr_safe(unsigned msr, int *err) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada998a402..a28b671f1549 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -860,8 +860,6 @@ extern unsigned long KSTK_ESP(struct task_struct *task); #endif /* CONFIG_X86_64 */ -extern unsigned long thread_saved_pc(struct task_struct *tsk); - extern void start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 27e9f9d769b8..2016962103df 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -29,11 +29,9 @@ struct cyc2ns_data { u32 cyc2ns_mul; u32 cyc2ns_shift; u64 cyc2ns_offset; - u32 __count; - /* u32 hole */ -}; /* 24 bytes -- do not grow */ +}; /* 16 bytes */ -extern struct cyc2ns_data *cyc2ns_read_begin(void); -extern void cyc2ns_read_end(struct cyc2ns_data *); +extern void cyc2ns_read_begin(struct cyc2ns_data *); +extern void cyc2ns_read_end(void); #endif /* _ASM_X86_TIMER_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4b994232cb57..3c7c419c4e3e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -29,6 +29,7 @@ OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y +OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y # If instrumentation of this dir is enabled, boot hangs during first second. # Probably could be more selective here, but note that files related to irqs, diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 26b78d86f25a..85a9e17e0dbc 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y + obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c index ae50d3454d78..81ff48905623 100644 --- a/arch/x86/kernel/apic/htirq.c +++ b/arch/x86/kernel/apic/htirq.c @@ -150,7 +150,7 @@ static const struct irq_domain_ops htirq_domain_ops = { .deactivate = htirq_domain_deactivate, }; -void arch_init_htirq_domain(struct irq_domain *parent) +void __init arch_init_htirq_domain(struct irq_domain *parent) { if (disable_apic) return; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 347bb9f65737..247880fc29f9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1200,28 +1200,6 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); static struct irq_chip ioapic_chip, ioapic_ir_chip; -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for_each_ioapic_pin(apic, pin) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0))) - return irq_trigger(idx); - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - static void __init setup_IO_APIC_irqs(void) { unsigned int ioapic, pin; diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index c61aec7e65f4..4c5d1882bec5 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -136,7 +136,7 @@ static struct msi_domain_info pci_msi_domain_info = { .handler_name = "edge", }; -void arch_init_msi_domain(struct irq_domain *parent) +void __init arch_init_msi_domain(struct irq_domain *parent) { if (disable_apic) return; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1eb562..e66d8e48e456 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -405,7 +405,7 @@ int __init arch_probe_nr_irqs(void) } #ifdef CONFIG_X86_IO_APIC -static void init_legacy_irqs(void) +static void __init init_legacy_irqs(void) { int i, node = cpu_to_node(0); struct apic_chip_data *data; @@ -424,7 +424,7 @@ static void init_legacy_irqs(void) } } #else -static void init_legacy_irqs(void) { } +static inline void init_legacy_irqs(void) { } #endif int __init arch_early_irq_init(void) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index f5af0cc7eb0d..9257bd9dc664 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -856,11 +856,13 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, dentry = kernfs_mount(fs_type, flags, rdt_root, RDTGROUP_SUPER_MAGIC, NULL); if (IS_ERR(dentry)) - goto out_cdp; + goto out_destroy; static_branch_enable(&rdt_enable_key); goto out; +out_destroy: + kernfs_remove(kn_info); out_cdp: cdp_disable(); out: diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 901c640d152f..69ea0bc1cfa3 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -28,6 +28,7 @@ #include <linux/kdebug.h> #include <linux/kallsyms.h> #include <linux/ftrace.h> +#include <linux/frame.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -94,6 +95,7 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) } asm ( + "optprobe_template_func:\n" ".global optprobe_template_entry\n" "optprobe_template_entry:\n" #ifdef CONFIG_X86_64 @@ -131,7 +133,12 @@ asm ( " popf\n" #endif ".global optprobe_template_end\n" - "optprobe_template_end:\n"); + "optprobe_template_end:\n" + ".type optprobe_template_func, @function\n" + ".size optprobe_template_func, .-optprobe_template_func\n"); + +void optprobe_template_func(void); +STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_MOVE_IDX \ ((long)&optprobe_template_val - (long)&optprobe_template_entry) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0bb88428cbf2..3ca198080ea9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,17 +545,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } /* - * Return saved PC of a blocked thread. - * What is this good for? it will be always the scheduler or ret_from_fork. - */ -unsigned long thread_saved_pc(struct task_struct *tsk) -{ - struct inactive_task_frame *frame = - (struct inactive_task_frame *) READ_ONCE(tsk->thread.sp); - return READ_ONCE_NOCHECK(frame->ret_addr); -} - -/* * Called from fs/proc with a reference on @p to find the function * which called into schedule(). This needs to be done carefully * because the task might wake up and we might look at a stack diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2544700a2a87..67393fc88353 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/tboot.h> #include <linux/delay.h> +#include <linux/frame.h> #include <acpi/reboot.h> #include <asm/io.h> #include <asm/apic.h> @@ -123,6 +124,7 @@ void __noreturn machine_real_restart(unsigned int type) #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(machine_real_restart); #endif +STACK_FRAME_NON_STANDARD(machine_real_restart); /* * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f81823695014..65622f07e633 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -503,7 +503,7 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(low_size, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f04479a8f74f..045e4f993bd2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -863,7 +863,7 @@ static void announce_cpu(int cpu, int apicid) if (cpu == 1) printk(KERN_INFO "x86: Booting SMP configuration:\n"); - if (system_state == SYSTEM_BOOTING) { + if (system_state < SYSTEM_RUNNING) { if (node != current_node) { if (current_node > (-1)) pr_cont("\n"); diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 4b1724059909..a4eb27918ceb 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,7 +514,7 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (!intel_iommu_tboot_noforce) + if (intel_iommu_tboot_noforce) return 1; if (no_iommu || swiotlb || dmar_disabled) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 714dfba6a1e7..5270fc0c2df6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -51,115 +51,34 @@ static u32 art_to_tsc_denominator; static u64 art_to_tsc_offset; struct clocksource *art_related_clocksource; -/* - * Use a ring-buffer like data structure, where a writer advances the head by - * writing a new data entry and a reader advances the tail when it observes a - * new entry. - * - * Writers are made to wait on readers until there's space to write a new - * entry. - * - * This means that we can always use an {offset, mul} pair to compute a ns - * value that is 'roughly' in the right direction, even if we're writing a new - * {offset, mul} pair during the clock read. - * - * The down-side is that we can no longer guarantee strict monotonicity anymore - * (assuming the TSC was that to begin with), because while we compute the - * intersection point of the two clock slopes and make sure the time is - * continuous at the point of switching; we can no longer guarantee a reader is - * strictly before or after the switch point. - * - * It does mean a reader no longer needs to disable IRQs in order to avoid - * CPU-Freq updates messing with his times, and similarly an NMI reader will - * no longer run the risk of hitting half-written state. - */ - struct cyc2ns { - struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ - struct cyc2ns_data *head; /* 48 + 8 = 56 */ - struct cyc2ns_data *tail; /* 56 + 8 = 64 */ -}; /* exactly fits one cacheline */ - -static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); - -struct cyc2ns_data *cyc2ns_read_begin(void) -{ - struct cyc2ns_data *head; - - preempt_disable(); - - head = this_cpu_read(cyc2ns.head); - /* - * Ensure we observe the entry when we observe the pointer to it. - * matches the wmb from cyc2ns_write_end(). - */ - smp_read_barrier_depends(); - head->__count++; - barrier(); + struct cyc2ns_data data[2]; /* 0 + 2*16 = 32 */ + seqcount_t seq; /* 32 + 4 = 36 */ - return head; -} +}; /* fits one cacheline */ -void cyc2ns_read_end(struct cyc2ns_data *head) -{ - barrier(); - /* - * If we're the outer most nested read; update the tail pointer - * when we're done. This notifies possible pending writers - * that we've observed the head pointer and that the other - * entry is now free. - */ - if (!--head->__count) { - /* - * x86-TSO does not reorder writes with older reads; - * therefore once this write becomes visible to another - * cpu, we must be finished reading the cyc2ns_data. - * - * matches with cyc2ns_write_begin(). - */ - this_cpu_write(cyc2ns.tail, head); - } - preempt_enable(); -} +static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -/* - * Begin writing a new @data entry for @cpu. - * - * Assumes some sort of write side lock; currently 'provided' by the assumption - * that cpufreq will call its notifiers sequentially. - */ -static struct cyc2ns_data *cyc2ns_write_begin(int cpu) +void cyc2ns_read_begin(struct cyc2ns_data *data) { - struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); - struct cyc2ns_data *data = c2n->data; + int seq, idx; - if (data == c2n->head) - data++; + preempt_disable_notrace(); - /* XXX send an IPI to @cpu in order to guarantee a read? */ + do { + seq = this_cpu_read(cyc2ns.seq.sequence); + idx = seq & 1; - /* - * When we observe the tail write from cyc2ns_read_end(), - * the cpu must be done with that entry and its safe - * to start writing to it. - */ - while (c2n->tail == data) - cpu_relax(); + data->cyc2ns_offset = this_cpu_read(cyc2ns.data[idx].cyc2ns_offset); + data->cyc2ns_mul = this_cpu_read(cyc2ns.data[idx].cyc2ns_mul); + data->cyc2ns_shift = this_cpu_read(cyc2ns.data[idx].cyc2ns_shift); - return data; + } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) +void cyc2ns_read_end(void) { - struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); - - /* - * Ensure the @data writes are visible before we publish the - * entry. Matches the data-depencency in cyc2ns_read_begin(). - */ - smp_wmb(); - - ACCESS_ONCE(c2n->head) = data; + preempt_enable_notrace(); } /* @@ -191,7 +110,6 @@ static void cyc2ns_data_init(struct cyc2ns_data *data) data->cyc2ns_mul = 0; data->cyc2ns_shift = 0; data->cyc2ns_offset = 0; - data->__count = 0; } static void cyc2ns_init(int cpu) @@ -201,51 +119,29 @@ static void cyc2ns_init(int cpu) cyc2ns_data_init(&c2n->data[0]); cyc2ns_data_init(&c2n->data[1]); - c2n->head = c2n->data; - c2n->tail = c2n->data; + seqcount_init(&c2n->seq); } static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - struct cyc2ns_data *data, *tail; + struct cyc2ns_data data; unsigned long long ns; - /* - * See cyc2ns_read_*() for details; replicated in order to avoid - * an extra few instructions that came with the abstraction. - * Notable, it allows us to only do the __count and tail update - * dance when its actually needed. - */ - - preempt_disable_notrace(); - data = this_cpu_read(cyc2ns.head); - tail = this_cpu_read(cyc2ns.tail); - - if (likely(data == tail)) { - ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); - } else { - data->__count++; - - barrier(); - - ns = data->cyc2ns_offset; - ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + cyc2ns_read_begin(&data); - barrier(); + ns = data.cyc2ns_offset; + ns += mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); - if (!--data->__count) - this_cpu_write(cyc2ns.tail, data); - } - preempt_enable_notrace(); + cyc2ns_read_end(); return ns; } -static void set_cyc2ns_scale(unsigned long khz, int cpu) +static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now) { - unsigned long long tsc_now, ns_now; - struct cyc2ns_data *data; + unsigned long long ns_now; + struct cyc2ns_data data; + struct cyc2ns *c2n; unsigned long flags; local_irq_save(flags); @@ -254,9 +150,6 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) if (!khz) goto done; - data = cyc2ns_write_begin(cpu); - - tsc_now = rdtsc(); ns_now = cycles_2_ns(tsc_now); /* @@ -264,7 +157,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) * time function is continuous; see the comment near struct * cyc2ns_data. */ - clocks_calc_mult_shift(&data->cyc2ns_mul, &data->cyc2ns_shift, khz, + clocks_calc_mult_shift(&data.cyc2ns_mul, &data.cyc2ns_shift, khz, NSEC_PER_MSEC, 0); /* @@ -273,20 +166,26 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu) * conversion algorithm shifting a 32-bit value (now specifies a 64-bit * value) - refer perf_event_mmap_page documentation in perf_event.h. */ - if (data->cyc2ns_shift == 32) { - data->cyc2ns_shift = 31; - data->cyc2ns_mul >>= 1; + if (data.cyc2ns_shift == 32) { + data.cyc2ns_shift = 31; + data.cyc2ns_mul >>= 1; } - data->cyc2ns_offset = ns_now - - mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, data->cyc2ns_shift); + data.cyc2ns_offset = ns_now - + mul_u64_u32_shr(tsc_now, data.cyc2ns_mul, data.cyc2ns_shift); + + c2n = per_cpu_ptr(&cyc2ns, cpu); - cyc2ns_write_end(cpu, data); + raw_write_seqcount_latch(&c2n->seq); + c2n->data[0] = data; + raw_write_seqcount_latch(&c2n->seq); + c2n->data[1] = data; done: - sched_clock_idle_wakeup_event(0); + sched_clock_idle_wakeup_event(); local_irq_restore(flags); } + /* * Scheduler clock - returns current time in nanosec units. */ @@ -374,6 +273,8 @@ static int __init tsc_setup(char *str) tsc_clocksource_reliable = 1; if (!strncmp(str, "noirqtime", 9)) no_sched_irq_time = 1; + if (!strcmp(str, "unstable")) + mark_tsc_unstable("boot parameter"); return 1; } @@ -986,7 +887,6 @@ void tsc_restore_sched_clock_state(void) } #ifdef CONFIG_CPU_FREQ - /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency * changes. * @@ -1027,7 +927,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!(freq->flags & CPUFREQ_CONST_LOOPS)) mark_tsc_unstable("cpufreq changes"); - set_cyc2ns_scale(tsc_khz, freq->cpu); + set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc()); } return 0; @@ -1127,6 +1027,15 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) pr_info("Marking TSC unstable due to clocksource watchdog\n"); } +static void tsc_cs_tick_stable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + + if (using_native_sched_clock()) + sched_clock_tick_stable(); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1140,6 +1049,7 @@ static struct clocksource clocksource_tsc = { .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, + .tick_stable = tsc_cs_tick_stable, }; void mark_tsc_unstable(char *reason) @@ -1255,6 +1165,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) static int hpet; u64 tsc_stop, ref_stop, delta; unsigned long freq; + int cpu; /* Don't bother refining TSC on unstable systems */ if (check_tsc_unstable()) @@ -1305,6 +1216,10 @@ static void tsc_refine_calibration_work(struct work_struct *work) /* Inform the TSC deadline clockevent devices about the recalibration */ lapic_update_tsc_freq(); + /* Update the sched_clock() rate to match the clocksource one */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); + out: if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; @@ -1350,7 +1265,7 @@ device_initcall(init_tsc_clocksource); void __init tsc_init(void) { - u64 lpj; + u64 lpj, cyc; int cpu; if (!boot_cpu_has(X86_FEATURE_TSC)) { @@ -1390,9 +1305,10 @@ void __init tsc_init(void) * speed as the bootup CPU. (cpufreq notifiers will fix this * up if their speed diverges) */ + cyc = rdtsc(); for_each_possible_cpu(cpu) { cyc2ns_init(cpu); - set_cyc2ns_scale(tsc_khz, cpu); + set_cyc2ns_scale(tsc_khz, cpu, cyc); } if (tsc_disabled > 0) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0816ab2e8adc..80890dee66ce 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2742,6 +2742,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ba9891ac5c56..33460fcdeef9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -36,6 +36,7 @@ #include <linux/slab.h> #include <linux/amd-iommu.h> #include <linux/hashtable.h> +#include <linux/frame.h> #include <asm/apic.h> #include <asm/perf_event.h> @@ -4906,6 +4907,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } +STACK_FRAME_NON_STANDARD(svm_vcpu_run); static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ca5d2b93385c..1b469b6c762f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/tboot.h> #include <linux/hrtimer.h> +#include <linux/frame.h> #include "kvm_cache_regs.h" #include "x86.h" @@ -8652,6 +8653,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) ); } } +STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); static bool vmx_has_high_real_mode_segbase(void) { @@ -9028,6 +9030,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); } +STACK_FRAME_NON_STANDARD(vmx_vcpu_run); static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 87d3cb901935..0e846f0cb83b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5313,6 +5313,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); + ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0; + ctxt->eip = kvm_rip_read(vcpu); ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 : @@ -5528,36 +5530,25 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, return dr6; } -static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r) +static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) { struct kvm_run *kvm_run = vcpu->run; - /* - * rflags is the old, "raw" value of the flags. The new value has - * not been saved yet. - * - * This is correct even for TF set by the guest, because "the - * processor will not generate this exception after the instruction - * that sets the TF flag". - */ - if (unlikely(rflags & X86_EFLAGS_TF)) { - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | - DR6_RTM; - kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - *r = EMULATE_USER_EXIT; - } else { - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM; + kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; + kvm_run->debug.arch.exception = DB_VECTOR; + kvm_run->exit_reason = KVM_EXIT_DEBUG; + *r = EMULATE_USER_EXIT; + } else { + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= DR6_BS | DR6_RTM; + kvm_queue_exception(vcpu, DB_VECTOR); } } @@ -5567,7 +5558,17 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu) int r = EMULATE_DONE; kvm_x86_ops->skip_emulated_instruction(vcpu); - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + + /* + * rflags is the old, "raw" value of the flags. The new value has + * not been saved yet. + * + * This is correct even for TF set by the guest, because "the + * processor will not generate this exception after the instruction + * that sets the TF flag". + */ + if (unlikely(rflags & X86_EFLAGS_TF)) + kvm_vcpu_do_singlestep(vcpu, &r); return r == EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction); @@ -5726,8 +5727,9 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); - if (r == EMULATE_DONE) - kvm_vcpu_check_singlestep(vcpu, rflags, &r); + if (r == EMULATE_DONE && + (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP))) + kvm_vcpu_do_singlestep(vcpu, &r); if (!ctxt->have_exception || exception_type(ctxt->exception.vector) == EXCPT_TRAP) __kvm_set_rflags(vcpu, ctxt->eflags); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index c5959576c315..020f75cc8cf6 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled) movl %edx,%ecx andl $63,%edx shrl $6,%ecx - jz 17f + jz .L_copy_short_string 1: movq (%rsi),%r8 2: movq 1*8(%rsi),%r9 3: movq 2*8(%rsi),%r10 @@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled) leaq 64(%rdi),%rdi decl %ecx jnz 1b -17: movl %edx,%ecx +.L_copy_short_string: + movl %edx,%ecx andl $7,%edx shrl $3,%ecx jz 20f @@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) ASM_STAC + cmpl $64,%edx + jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */ movl %edx,%ecx 1: rep movsb diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S index c81556409bbb..10ffa7e8519f 100644 --- a/arch/x86/lib/msr-reg.S +++ b/arch/x86/lib/msr-reg.S @@ -13,14 +13,14 @@ .macro op_safe_regs op ENTRY(\op\()_safe_regs) pushq %rbx - pushq %rbp + pushq %r12 movq %rdi, %r10 /* Save pointer */ xorl %r11d, %r11d /* Return value */ movl (%rdi), %eax movl 4(%rdi), %ecx movl 8(%rdi), %edx movl 12(%rdi), %ebx - movl 20(%rdi), %ebp + movl 20(%rdi), %r12d movl 24(%rdi), %esi movl 28(%rdi), %edi 1: \op @@ -29,10 +29,10 @@ ENTRY(\op\()_safe_regs) movl %ecx, 4(%r10) movl %edx, 8(%r10) movl %ebx, 12(%r10) - movl %ebp, 20(%r10) + movl %r12d, 20(%r10) movl %esi, 24(%r10) movl %edi, 28(%r10) - popq %rbp + popq %r12 popq %rbx ret 3: diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 767be7c76034..12e377184ee4 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -1009,7 +1009,7 @@ GrpTable: Grp15 1: fxstor | RDGSBASE Ry (F3),(11B) 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) -4: XSAVE +4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) 6: XSAVEOPT | clwb (66) | mfence (11B) 7: clflush | clflushopt (66) | sfence (11B) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 95651dc58e09..0a59daf799f8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -990,7 +990,13 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, pud_base = pud_offset(p4d, 0); remove_pud_table(pud_base, addr, next, direct); - free_pud_table(pud_base, p4d); + /* + * For 4-level page tables we do not want to free PUDs, but in the + * 5-level case we should free them. This code will have to change + * to adapt for boot-time switching between 4 and 5 level page tables. + */ + if (CONFIG_PGTABLE_LEVELS == 5) + free_pud_table(pud_base, p4d); } if (direct) diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile index 90568c33ddb0..fefb4b619598 100644 --- a/arch/x86/net/Makefile +++ b/arch/x86/net/Makefile @@ -1,4 +1,6 @@ # # Arch-specific network modules # +OBJECT_FILES_NON_STANDARD_bpf_jit.o += y + obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index f1d83b34c329..2f56e1ed61c3 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,4 +1,5 @@ OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_efi_stub_$(BITS).o := y obj-$(CONFIG_EFI) += quirks.o efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 43b96f5f78ba..f084d8718ac4 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1014,7 +1014,6 @@ static void __init __efi_enter_virtual_mode(void) * necessary relocation fixups for the new virtual addresses. */ efi_runtime_update_mappings(); - efi_dump_pagetable(); /* clean DUMMY object */ efi_delete_dummy_variable(); @@ -1029,6 +1028,8 @@ void __init efi_enter_virtual_mode(void) kexec_enter_virtual_mode(); else __efi_enter_virtual_mode(); + + efi_dump_pagetable(); } /* diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 3481268da3d0..52f7faa1538f 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -44,7 +44,14 @@ int __init efi_alloc_page_tables(void) } void efi_sync_low_kernel_mappings(void) {} -void __init efi_dump_pagetable(void) {} + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + ptdump_walk_pgd_level(NULL, swapper_pg_dir); +#endif +} + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { return 0; diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index eb8dff15a7f6..8ff1f95627f9 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -589,7 +589,10 @@ void __init efi_runtime_update_mappings(void) void __init efi_dump_pagetable(void) { #ifdef CONFIG_EFI_PGT_DUMP - ptdump_walk_pgd_level(NULL, efi_pgd); + if (efi_enabled(EFI_OLD_MEMMAP)) + ptdump_walk_pgd_level(NULL, swapper_pg_dir); + else + ptdump_walk_pgd_level(NULL, efi_pgd); #endif } diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index e0cf95a83f3f..8a99a2e96537 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -15,12 +15,66 @@ #include <asm/e820/api.h> #include <asm/efi.h> #include <asm/uv/uv.h> +#include <asm/cpu_device_id.h> #define EFI_MIN_RESERVE 5120 #define EFI_DUMMY_GUID \ EFI_GUID(0x4424ac57, 0xbe4b, 0x47dd, 0x9e, 0x97, 0xed, 0x50, 0xf0, 0x9f, 0x92, 0xa9) +#define QUARK_CSH_SIGNATURE 0x5f435348 /* _CSH */ +#define QUARK_SECURITY_HEADER_SIZE 0x400 + +/* + * Header prepended to the standard EFI capsule on Quark systems the are based + * on Intel firmware BSP. + * @csh_signature: Unique identifier to sanity check signed module + * presence ("_CSH"). + * @version: Current version of CSH used. Should be one for Quark A0. + * @modulesize: Size of the entire module including the module header + * and payload. + * @security_version_number_index: Index of SVN to use for validation of signed + * module. + * @security_version_number: Used to prevent against roll back of modules. + * @rsvd_module_id: Currently unused for Clanton (Quark). + * @rsvd_module_vendor: Vendor Identifier. For Intel products value is + * 0x00008086. + * @rsvd_date: BCD representation of build date as yyyymmdd, where + * yyyy=4 digit year, mm=1-12, dd=1-31. + * @headersize: Total length of the header including including any + * padding optionally added by the signing tool. + * @hash_algo: What Hash is used in the module signing. + * @cryp_algo: What Crypto is used in the module signing. + * @keysize: Total length of the key data including including any + * padding optionally added by the signing tool. + * @signaturesize: Total length of the signature including including any + * padding optionally added by the signing tool. + * @rsvd_next_header: 32-bit pointer to the next Secure Boot Module in the + * chain, if there is a next header. + * @rsvd: Reserved, padding structure to required size. + * + * See also QuartSecurityHeader_t in + * Quark_EDKII_v1.2.1.1/QuarkPlatformPkg/Include/QuarkBootRom.h + * from https://downloadcenter.intel.com/download/23197/Intel-Quark-SoC-X1000-Board-Support-Package-BSP + */ +struct quark_security_header { + u32 csh_signature; + u32 version; + u32 modulesize; + u32 security_version_number_index; + u32 security_version_number; + u32 rsvd_module_id; + u32 rsvd_module_vendor; + u32 rsvd_date; + u32 headersize; + u32 hash_algo; + u32 cryp_algo; + u32 keysize; + u32 signaturesize; + u32 rsvd_next_header; + u32 rsvd[2]; +}; + static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; static bool efi_no_storage_paranoia; @@ -504,3 +558,86 @@ bool efi_poweroff_required(void) { return acpi_gbl_reduced_hardware || acpi_no_s5; } + +#ifdef CONFIG_EFI_CAPSULE_QUIRK_QUARK_CSH + +static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff, + size_t hdr_bytes) +{ + struct quark_security_header *csh = *pkbuff; + + /* Only process data block that is larger than the security header */ + if (hdr_bytes < sizeof(struct quark_security_header)) + return 0; + + if (csh->csh_signature != QUARK_CSH_SIGNATURE || + csh->headersize != QUARK_SECURITY_HEADER_SIZE) + return 1; + + /* Only process data block if EFI header is included */ + if (hdr_bytes < QUARK_SECURITY_HEADER_SIZE + + sizeof(efi_capsule_header_t)) + return 0; + + pr_debug("Quark security header detected\n"); + + if (csh->rsvd_next_header != 0) { + pr_err("multiple Quark security headers not supported\n"); + return -EINVAL; + } + + *pkbuff += csh->headersize; + cap_info->total_size = csh->headersize; + + /* + * Update the first page pointer to skip over the CSH header. + */ + cap_info->pages[0] += csh->headersize; + + return 1; +} + +#define ICPU(family, model, quirk_handler) \ + { X86_VENDOR_INTEL, family, model, X86_FEATURE_ANY, \ + (unsigned long)&quirk_handler } + +static const struct x86_cpu_id efi_capsule_quirk_ids[] = { + ICPU(5, 9, qrk_capsule_setup_info), /* Intel Quark X1000 */ + { } +}; + +int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff, + size_t hdr_bytes) +{ + int (*quirk_handler)(struct capsule_info *, void **, size_t); + const struct x86_cpu_id *id; + int ret; + + if (hdr_bytes < sizeof(efi_capsule_header_t)) + return 0; + + cap_info->total_size = 0; + + id = x86_match_cpu(efi_capsule_quirk_ids); + if (id) { + /* + * The quirk handler is supposed to return + * - a value > 0 if the setup should continue, after advancing + * kbuff as needed + * - 0 if not enough hdr_bytes are available yet + * - a negative error code otherwise + */ + quirk_handler = (typeof(quirk_handler))id->driver_data; + ret = quirk_handler(cap_info, &kbuff, hdr_bytes); + if (ret <= 0) + return ret; + } + + memcpy(&cap_info->header, kbuff, sizeof(cap_info->header)); + + cap_info->total_size += cap_info->header.imagesize; + + return __efi_capsule_setup_info(cap_info); +} + +#endif diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 42e65fee5673..795671593528 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -456,12 +456,13 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) */ static inline unsigned long long cycles_2_ns(unsigned long long cyc) { - struct cyc2ns_data *data = cyc2ns_read_begin(); + struct cyc2ns_data data; unsigned long long ns; - ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); + cyc2ns_read_begin(&data); + ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift); + cyc2ns_read_end(); - cyc2ns_read_end(data); return ns; } @@ -470,12 +471,13 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) */ static inline unsigned long long ns_2_cycles(unsigned long long ns) { - struct cyc2ns_data *data = cyc2ns_read_begin(); + struct cyc2ns_data data; unsigned long long cyc; - cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; + cyc2ns_read_begin(&data); + cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul; + cyc2ns_read_end(); - cyc2ns_read_end(data); return cyc; } diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index a6a198c33623..05041871ac90 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -1,3 +1,5 @@ +OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y + # __restore_processor_state() restores %gs after S3 resume and so should not # itself be stack-protected nostackp := $(call cc-option, -fno-stack-protector) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index fffb0a16f9e3..bced7a369a11 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,3 +1,6 @@ +OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y +OBJECT_FILES_NON_STANDARD_xen-pvh.o := y + ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 30bb2e80cfe7..a18703be9ead 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = { .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ }; -static const struct efi efi_xen __initconst = { - .systab = NULL, /* Initialized later. */ - .runtime_version = 0, /* Initialized later. */ - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .smbios3 = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, - .fw_vendor = EFI_INVALID_TABLE_ADDR, - .runtime = EFI_INVALID_TABLE_ADDR, - .config_table = EFI_INVALID_TABLE_ADDR, - .get_time = xen_efi_get_time, - .set_time = xen_efi_set_time, - .get_wakeup_time = xen_efi_get_wakeup_time, - .set_wakeup_time = xen_efi_set_wakeup_time, - .get_variable = xen_efi_get_variable, - .get_next_variable = xen_efi_get_next_variable, - .set_variable = xen_efi_set_variable, - .query_variable_info = xen_efi_query_variable_info, - .update_capsule = xen_efi_update_capsule, - .query_capsule_caps = xen_efi_query_capsule_caps, - .get_next_high_mono_count = xen_efi_get_next_high_mono_count, - .reset_system = xen_efi_reset_system, - .set_virtual_address_map = NULL, /* Not used under Xen. */ - .flags = 0 /* Initialized later. */ -}; - static efi_system_table_t __init *xen_efi_probe(void) { struct xen_platform_op op = { @@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void) /* Here we know that Xen runs on EFI platform. */ - efi = efi_xen; + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = xen_efi_reset_system; efi_systab_xen.tables = info->cfg.addr; efi_systab_xen.nr_tables = info->cfg.nent; diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 003eeee3fbc6..30ee8c608853 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -213,8 +213,6 @@ struct mm_struct; #define release_segments(mm) do { } while(0) #define forget_segments() do { } while (0) -#define thread_saved_pc(tsk) (task_pt_regs(tsk)->pc) - extern unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) |