diff options
78 files changed, 864 insertions, 541 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 35fe7ae0492e..8d941d6818cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5505,7 +5505,8 @@ S: Supported F: drivers/idle/intel_idle.c INTEL PSTATE DRIVER -M: Kristen Carlson Accardi <kristen@linux.intel.com> +M: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +M: Len Brown <lenb@kernel.org> L: linux-pm@vger.kernel.org S: Supported F: drivers/cpufreq/intel_pstate.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 851fe11c6069..9ac16a482ff1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -27,6 +27,7 @@ config ARM64 select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS select EDAC_SUPPORT + select FRAME_POINTER select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index c24d6adc0420..04fb73b973f1 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -2,10 +2,6 @@ menu "Kernel hacking" source "lib/Kconfig.debug" -config FRAME_POINTER - bool - default y - config ARM64_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2f71f9cdd39c..bdd7aa358d2a 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -224,3 +224,4 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_CCM=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y +CONFIG_CRYPTO_CRC32_ARM64=y diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 74d0b8eb0799..f61c84f6ba02 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -233,7 +233,7 @@ __CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") #undef __CMPXCHG_CASE #define __CMPXCHG_DBL(name, mb, rel, cl) \ -__LL_SC_INLINE int \ +__LL_SC_INLINE long \ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 1fce7908e690..197e06afbf71 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -387,7 +387,7 @@ __CMPXCHG_CASE(x, , mb_8, al, "memory") #define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op) #define __CMPXCHG_DBL(name, mb, cl...) \ -static inline int __cmpxchg_double##name(unsigned long old1, \ +static inline long __cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ unsigned long new1, \ unsigned long new2, \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f3acf421ded4..9819a9426b69 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -80,6 +80,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 0cd7b5947dfc..2d4ca4bb0dd3 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -32,7 +32,7 @@ #ifndef __ASSEMBLY__ #include <linux/psci.h> -#include <asm/types.h> +#include <linux/types.h> #include <asm/ptrace.h> #define __KVM_HAVE_GUEST_DEBUG diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 52f0d7a5a1c2..c8cf89223b5a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -696,7 +696,7 @@ static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap) } /* Check if we have a particular HWCAP enabled */ -static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) +static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 2bbdc0e4fd14..b1adc51b2c2e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -473,7 +473,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, * cpu logical map array containing MPIDR values related to logical * cpus. Assumes that cpu_logical_map(0) has already been initialized. */ -void __init of_parse_and_init_cpus(void) +static void __init of_parse_and_init_cpus(void) { struct device_node *dn = NULL; diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 40f7b33a22da..fce95e17cf7f 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -41,7 +41,7 @@ void notrace __cpu_suspend_save(struct cpu_suspend_ctx *ptr, * time the notifier runs debug exceptions might have been enabled already, * with HW breakpoints registers content still in an unknown state. */ -void (*hw_breakpoint_restore)(void *); +static void (*hw_breakpoint_restore)(void *); void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) { /* Prevent multiple restore hook initializations */ diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index f6fe17d88da5..b467fd0a384b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -15,6 +15,9 @@ ccflags-y := -shared -fno-common -fno-builtin ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +# Disable gcov profiling for VDSO code +GCOV_PROFILE := n + # Workaround for bare-metal (ELF) toolchains that neglect to pass -shared # down to collect2, resulting in silent corruption of the vDSO image. ccflags-y += -Wl,-shared diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c2fa6b56613c..e3f563c81c48 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -146,7 +146,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, if (((addr | next | phys) & ~CONT_MASK) == 0) { /* a block of CONT_PTES */ __populate_init_pte(pte, addr, next, phys, - prot | __pgprot(PTE_CONT)); + __pgprot(pgprot_val(prot) | PTE_CONT)); } else { /* * If the range being split is already inside of a @@ -165,7 +165,7 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (addr != end); } -void split_pud(pud_t *old_pud, pmd_t *pmd) +static void split_pud(pud_t *old_pud, pmd_t *pmd) { unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); @@ -447,7 +447,7 @@ static void __init map_mem(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } -void __init fixup_executable(void) +static void __init fixup_executable(void) { #ifdef CONFIG_DEBUG_RODATA /* now that we are actually fully mapped, make the start/end more fine grained */ @@ -691,7 +691,7 @@ void __set_fixmap(enum fixed_addresses idx, void *__init fixmap_remap_fdt(phys_addr_t dt_phys) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); - pgprot_t prot = PAGE_KERNEL | PTE_RDONLY; + pgprot_t prot = PAGE_KERNEL_RO; int size, offset; void *dt_virt; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 98a26ce82d26..aee5637ea436 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -35,6 +35,7 @@ aarch64_insn_gen_comp_branch_imm(0, offset, Rt, A64_VARIANT(sf), \ AARCH64_INSN_BRANCH_COMP_##type) #define A64_CBZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, ZERO) +#define A64_CBNZ(sf, Rt, imm19) A64_COMP_BRANCH(sf, Rt, (imm19) << 2, NONZERO) /* Conditional branch (immediate) */ #define A64_COND_BRANCH(cond, offset) \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a44e5293c6f5..cf3c7d4a1b58 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1,7 +1,7 @@ /* * BPF JIT compiler for ARM64 * - * Copyright (C) 2014 Zi Shen Lim <zlim.lnx@gmail.com> + * Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -225,6 +225,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) u8 jmp_cond; s32 jmp_offset; +#define check_imm(bits, imm) do { \ + if ((((imm) > 0) && ((imm) >> (bits))) || \ + (((imm) < 0) && (~(imm) >> (bits)))) { \ + pr_info("[%2d] imm=%d(0x%x) out of range\n", \ + i, imm, imm); \ + return -EINVAL; \ + } \ +} while (0) +#define check_imm19(imm) check_imm(19, imm) +#define check_imm26(imm) check_imm(26, imm) + switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: @@ -258,15 +269,33 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(A64_UDIV(is64, dst, dst, src), ctx); - break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - ctx->tmp_used = 1; - emit(A64_UDIV(is64, tmp, dst, src), ctx); - emit(A64_MUL(is64, tmp, tmp, src), ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + { + const u8 r0 = bpf2a64[BPF_REG_0]; + + /* if (src == 0) return 0 */ + jmp_offset = 3; /* skip ahead to else path */ + check_imm19(jmp_offset); + emit(A64_CBNZ(is64, src, jmp_offset), ctx); + emit(A64_MOVZ(1, r0, 0, 0), ctx); + jmp_offset = epilogue_offset(ctx); + check_imm26(jmp_offset); + emit(A64_B(jmp_offset), ctx); + /* else */ + switch (BPF_OP(code)) { + case BPF_DIV: + emit(A64_UDIV(is64, dst, dst, src), ctx); + break; + case BPF_MOD: + ctx->tmp_used = 1; + emit(A64_UDIV(is64, tmp, dst, src), ctx); + emit(A64_MUL(is64, tmp, tmp, src), ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + break; + } break; + } case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(A64_LSLV(is64, dst, dst, src), ctx); @@ -393,17 +422,6 @@ emit_bswap_uxt: emit(A64_ASR(is64, dst, dst, imm), ctx); break; -#define check_imm(bits, imm) do { \ - if ((((imm) > 0) && ((imm) >> (bits))) || \ - (((imm) < 0) && (~(imm) >> (bits)))) { \ - pr_info("[%2d] imm=%d(0x%x) out of range\n", \ - i, imm, imm); \ - return -EINVAL; \ - } \ -} while (0) -#define check_imm19(imm) check_imm(19, imm) -#define check_imm26(imm) check_imm(26, imm) - /* JUMP off */ case BPF_JMP | BPF_JA: jmp_offset = bpf2a64_offset(i + off, i, ctx); diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index db589167838c..dd3ac75776ad 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -16,6 +16,7 @@ config H8300 select OF_EARLY_FLATTREE select HAVE_MEMBLOCK select HAVE_DMA_ATTRS + select CLKSRC_OF config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile index 0d2d96e52d9f..e1c02ca230cb 100644 --- a/arch/h8300/Makefile +++ b/arch/h8300/Makefile @@ -22,7 +22,9 @@ KBUILD_CFLAGS += -DUTS_SYSNAME=\"uClinux\" KBUILD_AFLAGS += $(aflags-y) LDFLAGS += $(ldflags-y) +ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := h8300-unknown-linux- +endif core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/ ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""' diff --git a/arch/h8300/boot/compressed/Makefile b/arch/h8300/boot/compressed/Makefile index 87d03b7ee97e..d7bc3fa7f2c6 100644 --- a/arch/h8300/boot/compressed/Makefile +++ b/arch/h8300/boot/compressed/Makefile @@ -14,11 +14,12 @@ OBJECTS = $(obj)/head.o $(obj)/misc.o # in order to suppress error message. # CONFIG_MEMORY_START ?= 0x00400000 -CONFIG_BOOT_LINK_OFFSET ?= 0x00140000 +CONFIG_BOOT_LINK_OFFSET ?= 0x00280000 IMAGE_OFFSET := $(shell printf "0x%08x" $$(($(CONFIG_MEMORY_START)+$(CONFIG_BOOT_LINK_OFFSET)))) LIBGCC := $(shell $(CROSS-COMPILE)$(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) -LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup $(obj)/vmlinux.lds +LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -estartup -T $(obj)/vmlinux.lds \ + --defsym output=$(CONFIG_MEMORY_START) $(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(LIBGCC) FORCE $(call if_changed,ld) diff --git a/arch/h8300/boot/compressed/head.S b/arch/h8300/boot/compressed/head.S index 74c0d8cc40ba..0436350c1df5 100644 --- a/arch/h8300/boot/compressed/head.S +++ b/arch/h8300/boot/compressed/head.S @@ -9,8 +9,8 @@ .section .text..startup,"ax" .global startup startup: + mov.l #startup, sp mov.l er0, er4 - mov.l er0, sp mov.l #__sbss, er0 mov.l #__ebss, er1 sub.l er0, er1 @@ -24,7 +24,7 @@ startup: bne 1b jsr @decompress_kernel mov.l er4, er0 - jmp @0x400000 + jmp @output .align 9 fake_headers_as_bzImage: diff --git a/arch/h8300/boot/compressed/misc.c b/arch/h8300/boot/compressed/misc.c index c4f2cfcb117b..6029c5351895 100644 --- a/arch/h8300/boot/compressed/misc.c +++ b/arch/h8300/boot/compressed/misc.c @@ -28,7 +28,7 @@ static unsigned long free_mem_end_ptr; extern char input_data[]; extern int input_len; -static unsigned char *output; +extern char output[]; #define HEAP_SIZE 0x10000 @@ -56,15 +56,10 @@ void *memcpy(void *dest, const void *src, size_t n) static void error(char *x) { - while (1) ; /* Halt */ } -#define STACK_SIZE (4096) -long user_stack[STACK_SIZE]; -long *stack_start = &user_stack[STACK_SIZE]; - void decompress_kernel(void) { free_mem_ptr = (unsigned long)&_end; diff --git a/arch/h8300/boot/compressed/vmlinux.lds b/arch/h8300/boot/compressed/vmlinux.lds index a0a3a0ed54ef..44fd209db88a 100644 --- a/arch/h8300/boot/compressed/vmlinux.lds +++ b/arch/h8300/boot/compressed/vmlinux.lds @@ -27,6 +27,6 @@ SECTIONS *(.bss*) . = ALIGN(0x4) ; __ebss = . ; - __end = . ; } + _end = . ; } diff --git a/arch/h8300/boot/dts/edosk2674.dts b/arch/h8300/boot/dts/edosk2674.dts index dfb5c102f8da..4ce9fa874a57 100644 --- a/arch/h8300/boot/dts/edosk2674.dts +++ b/arch/h8300/boot/dts/edosk2674.dts @@ -7,7 +7,7 @@ chosen { bootargs = "console=ttySC2,38400"; - stdout-path = <&sci2>; + stdout-path = &sci2; }; aliases { serial0 = &sci0; @@ -25,13 +25,13 @@ compatible = "renesas,h8s2678-pll-clock"; clocks = <&xclk>; #clock-cells = <0>; - reg = <0xfee03b 2>, <0xfee045 2>; + reg = <0xffff3b 1>, <0xffff45 1>; }; core_clk: core_clk { compatible = "renesas,h8300-div-clock"; clocks = <&pllclk>; #clock-cells = <0>; - reg = <0xfee03b 2>; + reg = <0xffff3b 1>; renesas,width = <3>; }; fclk: fclk { diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h index 1d09b2f2e0fe..bb837cded268 100644 --- a/arch/h8300/include/asm/io.h +++ b/arch/h8300/include/asm/io.h @@ -36,20 +36,20 @@ static inline void ctrl_outl(unsigned long b, unsigned long addr) *(volatile unsigned long *)addr = b; } -static inline void ctrl_bclr(int b, unsigned long addr) +static inline void ctrl_bclr(int b, unsigned char *addr) { if (__builtin_constant_p(b)) - __asm__("bclr %1,%0" : : "WU"(addr), "i"(b)); + __asm__("bclr %1,%0" : "+WU"(*addr): "i"(b)); else - __asm__("bclr %w1,%0" : : "WU"(addr), "r"(b)); + __asm__("bclr %w1,%0" : "+WU"(*addr): "r"(b)); } -static inline void ctrl_bset(int b, unsigned long addr) +static inline void ctrl_bset(int b, unsigned char *addr) { if (__builtin_constant_p(b)) - __asm__("bset %1,%0" : : "WU"(addr), "i"(b)); + __asm__("bset %1,%0" : "+WU"(*addr): "i"(b)); else - __asm__("bset %w1,%0" : : "WU"(addr), "r"(b)); + __asm__("bset %w1,%0" : "+WU"(*addr): "r"(b)); } #endif /* __KERNEL__ */ diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 544c30785ad4..b408fe660cf8 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -13,6 +13,12 @@ #ifdef __KERNEL__ +/* + * Size of kernel stack for each process. This must be a power of 2... + */ +#define THREAD_SIZE_ORDER 1 +#define THREAD_SIZE 8192 /* 2 pages */ + #ifndef __ASSEMBLY__ /* @@ -46,14 +52,6 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) - -/* - * Size of kernel stack for each process. This must be a power of 2... - */ -#define THREAD_SIZE_ORDER 1 -#define THREAD_SIZE 8192 /* 2 pages */ - - /* how to get the thread information struct from C */ static inline struct thread_info *current_thread_info(void) { diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 0fd1fe65c0b8..c772abe6d19c 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/clk-provider.h> #include <linux/memblock.h> #include <linux/screen_info.h> +#include <linux/clocksource.h> #include <asm/setup.h> #include <asm/irq.h> @@ -252,4 +253,5 @@ void __init calibrate_delay(void) void __init time_init(void) { of_clk_init(NULL); + clocksource_probe(); } diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S index 7c302dcf5249..cb5dfb02c88d 100644 --- a/arch/h8300/kernel/vmlinux.lds.S +++ b/arch/h8300/kernel/vmlinux.lds.S @@ -1,5 +1,6 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/page.h> +#include <asm/thread_info.h> #define ROMTOP 0x000000 #define RAMTOP 0x400000 @@ -42,11 +43,10 @@ SECTIONS . = RAMTOP; _ramstart = .; #define ADDR(x) ROMEND -#else #endif _sdata = . ; __data_start = . ; - RW_DATA_SECTION(0,0,0) + RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE) #if defined(CONFIG_ROMKERNEL) #undef ADDR #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9c26c5a96ea2..54b45b73195f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2019,7 +2019,7 @@ static bool can_split_piggybacked_subcores(struct core_info *cip) return false; n_subcores += (cip->subcore_threads[sub] - 1) >> 1; } - if (n_subcores > 3 || large_sub < 0) + if (large_sub < 0 || !subcore_config_ok(n_subcores + 1, 2)) return false; /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b1dab8d1d885..3c6badcd53ef 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1749,7 +1749,8 @@ kvmppc_hdsi: beq 3f clrrdi r0, r4, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ - bne 1f /* if no SLB entry found */ + li r0, BOOK3S_INTERRUPT_DATA_SEGMENT + bne 7f /* if no SLB entry found */ 4: std r4, VCPU_FAULT_DAR(r9) stw r6, VCPU_FAULT_DSISR(r9) @@ -1768,14 +1769,15 @@ kvmppc_hdsi: cmpdi r3, -2 /* MMIO emulation; need instr word */ beq 2f - /* Synthesize a DSI for the guest */ + /* Synthesize a DSI (or DSegI) for the guest */ ld r4, VCPU_FAULT_DAR(r9) mr r6, r3 -1: mtspr SPRN_DAR, r4 +1: li r0, BOOK3S_INTERRUPT_DATA_STORAGE mtspr SPRN_DSISR, r6 +7: mtspr SPRN_DAR, r4 mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 - li r10, BOOK3S_INTERRUPT_DATA_STORAGE + mr r10, r0 bl kvmppc_msr_interrupt fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) @@ -1823,7 +1825,8 @@ kvmppc_hisi: beq 3f clrrdi r0, r10, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ - bne 1f /* if no SLB entry found */ + li r0, BOOK3S_INTERRUPT_INST_SEGMENT + bne 7f /* if no SLB entry found */ 4: /* Search the hash table. */ mr r3, r9 /* vcpu pointer */ @@ -1840,11 +1843,12 @@ kvmppc_hisi: cmpdi r3, -1 /* handle in kernel mode */ beq guest_exit_cont - /* Synthesize an ISI for the guest */ + /* Synthesize an ISI (or ISegI) for the guest */ mr r11, r3 -1: mtspr SPRN_SRR0, r10 +1: li r0, BOOK3S_INTERRUPT_INST_STORAGE +7: mtspr SPRN_SRR0, r10 mtspr SPRN_SRR1, r11 - li r10, BOOK3S_INTERRUPT_INST_STORAGE + mr r10, r0 bl kvmppc_msr_interrupt b fast_interrupt_c_return diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9265196e877f..30cfd64295a0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -505,6 +505,7 @@ struct kvm_vcpu_arch { u32 virtual_tsc_mult; u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; + u64 tsc_scaling_ratio; atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ unsigned nmi_pending; /* NMI queued after currently running handler */ @@ -777,7 +778,7 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); + void (*update_bp_intercept)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); @@ -844,7 +845,7 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host); + void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -852,11 +853,9 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale); u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); @@ -923,17 +922,6 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; -static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, - s64 adjustment) -{ - kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false); -} - -static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) -{ - kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true); -} - int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -986,10 +974,12 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); /* control of guest tsc rate supported? */ extern bool kvm_has_tsc_control; -/* minimum supported tsc_khz for guests */ -extern u32 kvm_min_guest_tsc_khz; /* maximum supported tsc_khz for guests */ extern u32 kvm_max_guest_tsc_khz; +/* number of bits of the fractional part of the TSC scaling ratio */ +extern u8 kvm_tsc_scaling_ratio_frac_bits; +/* maximum allowed value of TSC scaling ratio */ +extern u64 kvm_max_tsc_scaling_ratio; enum emulation_result { EMULATE_DONE, /* no further processing */ @@ -1235,6 +1225,9 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, void kvm_define_shared_msr(unsigned index, u32 msr); int kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); + unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index aa336ff3e03e..14c63c7e8337 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -73,6 +73,7 @@ #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 #define SECONDARY_EXEC_PCOMMIT 0x00200000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 @@ -167,6 +168,8 @@ enum vmcs_field { VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d..8a4add8e4639 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -100,6 +100,7 @@ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_NMI, "nmi" }, \ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ecd4ea1d28a8..4d30b865be30 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1250,7 +1250,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline); /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ @@ -1318,7 +1318,7 @@ static void start_apic_timer(struct kvm_lapic *apic) local_irq_save(flags); now = apic->lapic_timer.timer.base->get_time(); - guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); if (likely(tscdeadline > guest_tsc)) { ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7d85bcae3332..e7c2c1428a69 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3359,7 +3359,7 @@ exit: return reserved; } -int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) +int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) { u64 spte; bool reserved; @@ -3368,7 +3368,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) return RET_MMIO_PF_EMULATE; reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte); - if (unlikely(reserved)) + if (WARN_ON(reserved)) return RET_MMIO_PF_BUG; if (is_mmio_spte(spte)) { @@ -3392,17 +3392,7 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) */ return RET_MMIO_PF_RETRY; } -EXPORT_SYMBOL_GPL(handle_mmio_page_fault_common); - -static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, - u32 error_code, bool direct) -{ - int ret; - - ret = handle_mmio_page_fault_common(vcpu, addr, direct); - WARN_ON(ret == RET_MMIO_PF_BUG); - return ret; -} +EXPORT_SYMBOL_GPL(handle_mmio_page_fault); static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, bool prefault) @@ -3413,7 +3403,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gva, error_code, true); + r = handle_mmio_page_fault(vcpu, gva, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; @@ -3503,7 +3493,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, gpa, error_code, true); + r = handle_mmio_page_fault(vcpu, gpa, true); if (likely(r != RET_MMIO_PF_INVALID)) return r; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e4202e41d535..55ffb7b0f95e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -56,13 +56,13 @@ void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); /* - * Return values of handle_mmio_page_fault_common: + * Return values of handle_mmio_page_fault: * RET_MMIO_PF_EMULATE: it is a real mmio page fault, emulate the instruction * directly. * RET_MMIO_PF_INVALID: invalid spte is detected then let the real page * fault path update the mmio spte. * RET_MMIO_PF_RETRY: let CPU fault again on the address. - * RET_MMIO_PF_BUG: bug is detected. + * RET_MMIO_PF_BUG: a bug was detected (and a WARN was printed). */ enum { RET_MMIO_PF_EMULATE = 1, @@ -71,7 +71,7 @@ enum { RET_MMIO_PF_BUG = -1 }; -int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); +int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index b41faa91a6f9..3058a22a658d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -705,8 +705,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); if (unlikely(error_code & PFERR_RSVD_MASK)) { - r = handle_mmio_page_fault(vcpu, addr, error_code, - mmu_is_nested(vcpu)); + r = handle_mmio_page_fault(vcpu, addr, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f2c8e4917688..83a1c643f9a5 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -158,8 +158,6 @@ struct vcpu_svm { unsigned long int3_rip; u32 apf_reason; - u64 tsc_ratio; - /* cached guest cpuid flags for faster access */ bool nrips_enabled : 1; }; @@ -214,7 +212,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -static u64 __scale_tsc(u64 ratio, u64 tsc); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -894,20 +891,9 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_FFXSR); if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - u64 max; - kvm_has_tsc_control = true; - - /* - * Make sure the user can only configure tsc_khz values that - * fit into a signed integer. - * A min value is not calculated needed because it will always - * be 1 on all machines and a value of 0 is used to disable - * tsc-scaling for the vcpu. - */ - max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); - - kvm_max_guest_tsc_khz = max; + kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; } if (nested) { @@ -971,68 +957,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 __scale_tsc(u64 ratio, u64 tsc) -{ - u64 mult, frac, _tsc; - - mult = ratio >> 32; - frac = ratio & ((1ULL << 32) - 1); - - _tsc = tsc; - _tsc *= mult; - _tsc += (tsc >> 32) * frac; - _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; - - return _tsc; -} - -static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 _tsc = tsc; - - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - _tsc = __scale_tsc(svm->tsc_ratio, tsc); - - return _tsc; -} - -static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 ratio; - u64 khz; - - /* Guest TSC same frequency as host TSC? */ - if (!scale) { - svm->tsc_ratio = TSC_RATIO_DEFAULT; - return; - } - - /* TSC scaling supported? */ - if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - if (user_tsc_khz > tsc_khz) { - vcpu->arch.tsc_catchup = 1; - vcpu->arch.tsc_always_catchup = 1; - } else - WARN(1, "user requested TSC rate below hardware speed\n"); - return; - } - - khz = user_tsc_khz; - - /* TSC scaling required - calculate ratio */ - ratio = khz << 32; - do_div(ratio, tsc_khz); - - if (ratio == 0 || ratio & TSC_RATIO_RSVD) { - WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", - user_tsc_khz); - return; - } - svm->tsc_ratio = ratio; -} - static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1059,16 +983,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) +static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { struct vcpu_svm *svm = to_svm(vcpu); - if (host) { - if (svm->tsc_ratio != TSC_RATIO_DEFAULT) - WARN_ON(adjustment < 0); - adjustment = svm_scale_tsc(vcpu, (u64)adjustment); - } - svm->vmcb->control.tsc_offset += adjustment; if (is_guest_mode(vcpu)) svm->nested.hsave->control.tsc_offset += adjustment; @@ -1080,15 +998,6 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) -{ - u64 tsc; - - tsc = svm_scale_tsc(vcpu, rdtsc()); - - return target_tsc - tsc; -} - static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1110,6 +1019,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); + set_exception_intercept(svm, DB_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1235,8 +1146,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto out; } - svm->tsc_ratio = TSC_RATIO_DEFAULT; - err = kvm_vcpu_init(&svm->vcpu, kvm, id); if (err) goto free_svm; @@ -1322,10 +1231,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); - if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && - svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { - __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); - wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); + if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { + u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio; + if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) { + __this_cpu_write(current_tsc_ratio, tsc_ratio); + wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); + } } } @@ -1644,20 +1555,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_bp_intercept(struct kvm_vcpu *vcpu) +static void update_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - clr_exception_intercept(svm, DB_VECTOR); clr_exception_intercept(svm, BP_VECTOR); - if (svm->nmi_singlestep) - set_exception_intercept(svm, DB_VECTOR); - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - set_exception_intercept(svm, DB_VECTOR); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) set_exception_intercept(svm, BP_VECTOR); } else @@ -1763,7 +1667,6 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -1798,6 +1701,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3075,8 +2984,7 @@ static int cr8_write_interception(struct vcpu_svm *svm) static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, host_tsc); + return vmcb->control.tsc_offset + host_tsc; } static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -3086,7 +2994,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_IA32_TSC: { msr_info->data = svm->vmcb->control.tsc_offset + - svm_scale_tsc(vcpu, rdtsc()); + kvm_scale_tsc(vcpu, rdtsc()); break; } @@ -3362,6 +3270,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -3745,7 +3654,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4371,7 +4279,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .update_db_bp_intercept = update_db_bp_intercept, + .update_bp_intercept = update_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -4443,11 +4351,9 @@ static struct kvm_x86_ops svm_x86_ops = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .set_tsc_khz = svm_set_tsc_khz, .read_tsc_offset = svm_read_tsc_offset, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset = svm_adjust_tsc_offset, - .compute_tsc_offset = svm_compute_tsc_offset, + .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5eb56ed77c1f..87acc5221740 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -107,6 +107,8 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -1172,6 +1174,12 @@ static inline bool cpu_has_vmx_pml(void) return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; } +static inline bool cpu_has_vmx_tsc_scaling(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_TSC_SCALING; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -1631,7 +1639,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR); + (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -2053,6 +2061,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ + + /* Setup TSC multiplier */ + if (cpu_has_vmx_tsc_scaling()) + vmcs_write64(TSC_MULTIPLIER, + vcpu->arch.tsc_scaling_ratio); + vmx->loaded_vmcs->cpu = cpu; } @@ -2357,15 +2371,16 @@ static void setup_msrs(struct vcpu_vmx *vmx) /* * reads and returns guest's timestamp counter "register" - * guest_tsc = host_tsc + tsc_offset -- 21.3 + * guest_tsc = (host_tsc * tsc multiplier) >> 48 + tsc_offset + * -- Intel TSC Scaling for Virtualization White Paper, sec 1.3 */ -static u64 guest_read_tsc(void) +static u64 guest_read_tsc(struct kvm_vcpu *vcpu) { u64 host_tsc, tsc_offset; host_tsc = rdtsc(); tsc_offset = vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; + return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; } /* @@ -2382,22 +2397,6 @@ static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) return host_tsc + tsc_offset; } -/* - * Engage any workarounds for mis-matched TSC rates. Currently limited to - * software catchup for faster rates on slower CPUs. - */ -static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) -{ - if (!scale) - return; - - if (user_tsc_khz > tsc_khz) { - vcpu->arch.tsc_catchup = 1; - vcpu->arch.tsc_always_catchup = 1; - } else - WARN(1, "user requested TSC rate below hardware speed\n"); -} - static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu) { return vmcs_read64(TSC_OFFSET); @@ -2429,7 +2428,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) +static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { u64 offset = vmcs_read64(TSC_OFFSET); @@ -2442,11 +2441,6 @@ static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool ho offset + adjustment); } -static u64 vmx_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) -{ - return target_tsc - rdtsc(); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -2778,7 +2772,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_EFER: return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_TSC: - msr_info->data = guest_read_tsc(); + msr_info->data = guest_read_tsc(vcpu); break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); @@ -3154,7 +3148,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_SHADOW_VMCS | SECONDARY_EXEC_XSAVES | SECONDARY_EXEC_ENABLE_PML | - SECONDARY_EXEC_PCOMMIT; + SECONDARY_EXEC_PCOMMIT | + SECONDARY_EXEC_TSC_SCALING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -5266,6 +5261,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & @@ -5908,7 +5906,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) return 1; } - ret = handle_mmio_page_fault_common(vcpu, gpa, true); + ret = handle_mmio_page_fault(vcpu, gpa, true); if (likely(ret == RET_MMIO_PF_EMULATE)) return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == EMULATE_DONE; @@ -6199,6 +6197,12 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_apicv()) enable_apicv = 0; + if (cpu_has_vmx_tsc_scaling()) { + kvm_has_tsc_control = true; + kvm_max_tsc_scaling_ratio = KVM_VMX_TSC_MULTIPLIER_MAX; + kvm_tsc_scaling_ratio_frac_bits = 48; + } + if (enable_apicv) kvm_x86_ops->update_cr8_intercept = NULL; else { @@ -8008,6 +8012,9 @@ static void dump_vmcs(void) vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_ERROR_CODE)); pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) + pr_err("TSC Multiplier = 0x%016lx\n", + vmcs_readl(TSC_MULTIPLIER)); if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) @@ -10752,7 +10759,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .update_db_bp_intercept = update_exception_bitmap, + .update_bp_intercept = update_exception_bitmap, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -10826,11 +10833,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - .set_tsc_khz = vmx_set_tsc_khz, .read_tsc_offset = vmx_read_tsc_offset, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset = vmx_adjust_tsc_offset, - .compute_tsc_offset = vmx_compute_tsc_offset, + .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a6eff166fc6..00462bd63129 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -93,10 +93,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); -struct kvm_x86_ops *kvm_x86_ops; +struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); -static bool ignore_msrs = 0; +static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); unsigned int min_timer_period_us = 500; @@ -105,20 +105,25 @@ module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; module_param(kvmclock_periodic_sync, bool, S_IRUGO); -bool kvm_has_tsc_control; +bool __read_mostly kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); -u32 kvm_max_guest_tsc_khz; +u32 __read_mostly kvm_max_guest_tsc_khz; EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz); +u8 __read_mostly kvm_tsc_scaling_ratio_frac_bits; +EXPORT_SYMBOL_GPL(kvm_tsc_scaling_ratio_frac_bits); +u64 __read_mostly kvm_max_tsc_scaling_ratio; +EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio); +static u64 __read_mostly kvm_default_tsc_scaling_ratio; /* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */ -static u32 tsc_tolerance_ppm = 250; +static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int lapic_timer_advance_ns = 0; +unsigned int __read_mostly lapic_timer_advance_ns = 0; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); -static bool backwards_tsc_observed = false; +static bool __read_mostly backwards_tsc_observed = false; #define KVM_NR_SHARED_MSRS 16 @@ -1249,14 +1254,53 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm) return v; } -static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) +static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) +{ + u64 ratio; + + /* Guest TSC same frequency as host TSC? */ + if (!scale) { + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return 0; + } + + /* TSC scaling supported? */ + if (!kvm_has_tsc_control) { + if (user_tsc_khz > tsc_khz) { + vcpu->arch.tsc_catchup = 1; + vcpu->arch.tsc_always_catchup = 1; + return 0; + } else { + WARN(1, "user requested TSC rate below hardware speed\n"); + return -1; + } + } + + /* TSC scaling required - calculate ratio */ + ratio = mul_u64_u32_div(1ULL << kvm_tsc_scaling_ratio_frac_bits, + user_tsc_khz, tsc_khz); + + if (ratio == 0 || ratio >= kvm_max_tsc_scaling_ratio) { + WARN_ONCE(1, "Invalid TSC scaling ratio - virtual-tsc-khz=%u\n", + user_tsc_khz); + return -1; + } + + vcpu->arch.tsc_scaling_ratio = ratio; + return 0; +} + +static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) { u32 thresh_lo, thresh_hi; int use_scaling = 0; /* tsc_khz can be zero if TSC calibration fails */ - if (this_tsc_khz == 0) - return; + if (this_tsc_khz == 0) { + /* set tsc_scaling_ratio to a safe value */ + vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio; + return -1; + } /* Compute a scale to convert nanoseconds in TSC cycles */ kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000, @@ -1276,7 +1320,7 @@ static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz) pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi); use_scaling = 1; } - kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling); + return set_tsc_khz(vcpu, this_tsc_khz, use_scaling); } static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns) @@ -1322,6 +1366,48 @@ static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } +/* + * Multiply tsc by a fixed point number represented by ratio. + * + * The most significant 64-N bits (mult) of ratio represent the + * integral part of the fixed point number; the remaining N bits + * (frac) represent the fractional part, ie. ratio represents a fixed + * point number (mult + frac * 2^(-N)). + * + * N equals to kvm_tsc_scaling_ratio_frac_bits. + */ +static inline u64 __scale_tsc(u64 ratio, u64 tsc) +{ + return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits); +} + +u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) +{ + u64 _tsc = tsc; + u64 ratio = vcpu->arch.tsc_scaling_ratio; + + if (ratio != kvm_default_tsc_scaling_ratio) + _tsc = __scale_tsc(ratio, tsc); + + return _tsc; +} +EXPORT_SYMBOL_GPL(kvm_scale_tsc); + +static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) +{ + u64 tsc; + + tsc = kvm_scale_tsc(vcpu, rdtsc()); + + return target_tsc - tsc; +} + +u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) +{ + return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); +} +EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); + void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) { struct kvm *kvm = vcpu->kvm; @@ -1333,7 +1419,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) u64 data = msr->data; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); ns = get_kernel_ns(); elapsed = ns - kvm->arch.last_tsc_nsec; @@ -1390,7 +1476,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) } else { u64 delta = nsec_to_cycles(vcpu, elapsed); data += delta; - offset = kvm_x86_ops->compute_tsc_offset(vcpu, data); + offset = kvm_compute_tsc_offset(vcpu, data); pr_debug("kvm: adjusted tsc offset by %llu\n", delta); } matched = true; @@ -1447,6 +1533,20 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) EXPORT_SYMBOL_GPL(kvm_write_tsc); +static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, + s64 adjustment) +{ + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + +static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) +{ + if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) + WARN_ON(adjustment < 0); + adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); + kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); +} + #ifdef CONFIG_X86_64 static cycle_t read_tsc(void) @@ -1608,7 +1708,7 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static int kvm_guest_time_update(struct kvm_vcpu *v) { - unsigned long flags, this_tsc_khz; + unsigned long flags, this_tsc_khz, tgt_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; s64 kernel_ns; @@ -1645,7 +1745,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = get_kernel_ns(); } - tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc); + tsc_timestamp = kvm_read_l1_tsc(v, host_tsc); /* * We may have to catch up the TSC to match elapsed wall clock @@ -1671,7 +1771,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) return 0; if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { - kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, + tgt_tsc_khz = kvm_has_tsc_control ? + vcpu->virtual_tsc_khz : this_tsc_khz; + kvm_get_time_scale(NSEC_PER_SEC / 1000, tgt_tsc_khz, &vcpu->hv_clock.tsc_shift, &vcpu->hv_clock.tsc_to_system_mul); vcpu->hw_tsc_khz = this_tsc_khz; @@ -2617,7 +2719,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); if (check_tsc_unstable()) { - u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu, + u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; @@ -3319,9 +3421,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (user_tsc_khz == 0) user_tsc_khz = tsc_khz; - kvm_set_tsc_khz(vcpu, user_tsc_khz); + if (!kvm_set_tsc_khz(vcpu, user_tsc_khz)) + r = 0; - r = 0; goto out; } case KVM_GET_TSC_KHZ: { @@ -6452,8 +6554,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); - vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, - rdtsc()); + vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); @@ -7015,7 +7116,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->update_db_bp_intercept(vcpu); + kvm_x86_ops->update_bp_intercept(vcpu); r = 0; @@ -7364,6 +7465,20 @@ int kvm_arch_hardware_setup(void) if (r != 0) return r; + if (kvm_has_tsc_control) { + /* + * Make sure the user can only configure tsc_khz values that + * fit into a signed integer. + * A min value is not calculated needed because it will always + * be 1 on all machines. + */ + u64 max = min(0x7fffffffULL, + __scale_tsc(kvm_max_tsc_scaling_ratio, tsc_khz)); + kvm_max_guest_tsc_khz = max; + + kvm_default_tsc_scaling_ratio = 1ULL << kvm_tsc_scaling_ratio_frac_bits; + } + kvm_init_msr_list(); return 0; } diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 06a67d5f2846..296b7a14893a 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -103,7 +103,12 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) pdevinfo.res = resources; pdevinfo.num_res = count; pdevinfo.fwnode = acpi_fwnode_handle(adev); - pdevinfo.dma_mask = acpi_check_dma(adev, NULL) ? DMA_BIT_MASK(32) : 0; + + if (acpi_dma_supported(adev)) + pdevinfo.dma_mask = DMA_BIT_MASK(32); + else + pdevinfo.dma_mask = 0; + pdev = platform_device_register_full(&pdevinfo); if (IS_ERR(pdev)) dev_err(&adev->dev, "platform device creation failed: %ld\n", diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 5778e8e4313a..3405f7a41e25 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -77,6 +77,12 @@ module_param(allow_duplicates, bool, 0644); static int disable_backlight_sysfs_if = -1; module_param(disable_backlight_sysfs_if, int, 0444); +static bool device_id_scheme = false; +module_param(device_id_scheme, bool, 0444); + +static bool only_lcd = false; +module_param(only_lcd, bool, 0444); + static int register_count; static DEFINE_MUTEX(register_count_mutex); static struct mutex video_list_lock; @@ -394,6 +400,18 @@ static int video_disable_backlight_sysfs_if( return 0; } +static int video_set_device_id_scheme(const struct dmi_system_id *d) +{ + device_id_scheme = true; + return 0; +} + +static int video_enable_only_lcd(const struct dmi_system_id *d) +{ + only_lcd = true; + return 0; +} + static struct dmi_system_id video_dmi_table[] = { /* * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121 @@ -455,6 +473,33 @@ static struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R830"), }, }, + /* + * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set + * but the IDs actually follow the Device ID Scheme. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_set_device_id_scheme, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, + /* + * Some machines have multiple video output devices, but only the one + * that is the type of LCD can do the backlight control so we should not + * register backlight interface for other video output devices. + */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=104121 */ + .callback = video_enable_only_lcd, + .ident = "ESPRIMO Mobile M9410", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO Mobile M9410"), + }, + }, {} }; @@ -1003,7 +1048,7 @@ acpi_video_bus_get_one_device(struct acpi_device *device, attribute = acpi_video_get_device_attr(video, device_id); - if (attribute && attribute->device_id_scheme) { + if (attribute && (attribute->device_id_scheme || device_id_scheme)) { switch (attribute->display_type) { case ACPI_VIDEO_DISPLAY_CRT: data->flags.crt = 1; @@ -1568,15 +1613,6 @@ static void acpi_video_dev_register_backlight(struct acpi_video_device *device) static int count; char *name; - /* - * Do not create backlight device for video output - * device that is not in the enumerated list. - */ - if (!acpi_video_device_in_dod(device)) { - dev_dbg(&device->dev->dev, "not in _DOD list, ignore\n"); - return; - } - result = acpi_video_init_brightness(device); if (result) return; @@ -1657,6 +1693,22 @@ static void acpi_video_run_bcl_for_osi(struct acpi_video_bus *video) mutex_unlock(&video->device_list_lock); } +static bool acpi_video_should_register_backlight(struct acpi_video_device *dev) +{ + /* + * Do not create backlight device for video output + * device that is not in the enumerated list. + */ + if (!acpi_video_device_in_dod(dev)) { + dev_dbg(&dev->dev->dev, "not in _DOD list, ignore\n"); + return false; + } + + if (only_lcd) + return dev->flags.lcd; + return true; +} + static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) { struct acpi_video_device *dev; @@ -1670,8 +1722,10 @@ static int acpi_video_bus_register_backlight(struct acpi_video_bus *video) return 0; mutex_lock(&video->device_list_lock); - list_for_each_entry(dev, &video->video_device_list, entry) - acpi_video_dev_register_backlight(dev); + list_for_each_entry(dev, &video->video_device_list, entry) { + if (acpi_video_should_register_backlight(dev)) + acpi_video_dev_register_backlight(dev); + } mutex_unlock(&video->device_list_lock); video->backlight_registered = true; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 1470ae4f98c0..5ea5dc219f56 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -168,7 +168,7 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) struct list_head *physnode_list; unsigned int node_id; int retval = -EINVAL; - bool coherent; + enum dev_dma_attr attr; if (has_acpi_companion(dev)) { if (acpi_dev) { @@ -225,8 +225,10 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) if (!has_acpi_companion(dev)) ACPI_COMPANION_SET(dev, acpi_dev); - if (acpi_check_dma(acpi_dev, &coherent)) - arch_setup_dma_ops(dev, 0, 0, NULL, coherent); + attr = acpi_get_dma_attr(acpi_dev); + if (attr != DEV_DMA_NOT_SUPPORTED) + arch_setup_dma_ops(dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); acpi_physnode_link_name(physical_node_name, node_id); retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index daf9fc8329e6..78d5f02a073b 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1308,6 +1308,48 @@ void acpi_free_pnp_ids(struct acpi_device_pnp *pnp) kfree(pnp->unique_id); } +/** + * acpi_dma_supported - Check DMA support for the specified device. + * @adev: The pointer to acpi device + * + * Return false if DMA is not supported. Otherwise, return true + */ +bool acpi_dma_supported(struct acpi_device *adev) +{ + if (!adev) + return false; + + if (adev->flags.cca_seen) + return true; + + /* + * Per ACPI 6.0 sec 6.2.17, assume devices can do cache-coherent + * DMA on "Intel platforms". Presumably that includes all x86 and + * ia64, and other arches will set CONFIG_ACPI_CCA_REQUIRED=y. + */ + if (!IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED)) + return true; + + return false; +} + +/** + * acpi_get_dma_attr - Check the supported DMA attr for the specified device. + * @adev: The pointer to acpi device + * + * Return enum dev_dma_attr. + */ +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + if (!acpi_dma_supported(adev)) + return DEV_DMA_NOT_SUPPORTED; + + if (adev->flags.coherent_dma) + return DEV_DMA_COHERENT; + else + return DEV_DMA_NON_COHERENT; +} + static void acpi_init_coherency(struct acpi_device *adev) { unsigned long long cca = 0; diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 0d3a384b508a..daaf1c4e1e0f 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -233,6 +233,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, { + /* https://bugzilla.redhat.com/show_bug.cgi?id=1272633 */ + .callback = video_detect_force_video, + .ident = "Dell XPS14 L421X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS L421X"), + }, + }, + { /* https://bugzilla.redhat.com/show_bug.cgi?id=1163574 */ .callback = video_detect_force_video, .ident = "Dell XPS15 L521X", diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 80e298870388..e03b1ad25a90 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -321,8 +321,7 @@ static int genpd_poweroff(struct generic_pm_domain *genpd, bool is_async) if (stat > PM_QOS_FLAGS_NONE) return -EBUSY; - if (pdd->dev->driver && (!pm_runtime_suspended(pdd->dev) - || pdd->dev->power.irq_safe)) + if (!pm_runtime_suspended(pdd->dev) || pdd->dev->power.irq_safe) not_suspended++; } @@ -1312,13 +1311,17 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *subdomain) { - struct gpd_link *link; + struct gpd_link *link, *itr; int ret = 0; if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain) || genpd == subdomain) return -EINVAL; + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + mutex_lock(&genpd->lock); mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); @@ -1328,18 +1331,13 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, goto out; } - list_for_each_entry(link, &genpd->master_links, master_node) { - if (link->slave == subdomain && link->master == genpd) { + list_for_each_entry(itr, &genpd->master_links, master_node) { + if (itr->slave == subdomain && itr->master == genpd) { ret = -EINVAL; goto out; } } - link = kzalloc(sizeof(*link), GFP_KERNEL); - if (!link) { - ret = -ENOMEM; - goto out; - } link->master = genpd; list_add_tail(&link->master_node, &genpd->master_links); link->slave = subdomain; @@ -1350,7 +1348,8 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, out: mutex_unlock(&subdomain->lock); mutex_unlock(&genpd->lock); - + if (ret) + kfree(link); return ret; } EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain); diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c index 270902007055..b8e76f75073b 100644 --- a/drivers/base/power/opp/core.c +++ b/drivers/base/power/opp/core.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/errno.h> #include <linux/err.h> #include <linux/slab.h> @@ -27,7 +29,7 @@ */ static LIST_HEAD(dev_opp_list); /* Lock to allow exclusive modification to the device and opp lists */ -static DEFINE_MUTEX(dev_opp_list_lock); +DEFINE_MUTEX(dev_opp_list_lock); #define opp_rcu_lockdep_assert() \ do { \ @@ -79,14 +81,18 @@ static struct device_opp *_managed_opp(const struct device_node *np) * Return: pointer to 'struct device_opp' if found, otherwise -ENODEV or * -EINVAL based on type of error. * - * Locking: This function must be called under rcu_read_lock(). device_opp - * is a RCU protected pointer. This means that device_opp is valid as long - * as we are under RCU lock. + * Locking: For readers, this function must be called under rcu_read_lock(). + * device_opp is a RCU protected pointer, which means that device_opp is valid + * as long as we are under RCU lock. + * + * For Writers, this function must be called with dev_opp_list_lock held. */ struct device_opp *_find_device_opp(struct device *dev) { struct device_opp *dev_opp; + opp_rcu_lockdep_assert(); + if (IS_ERR_OR_NULL(dev)) { pr_err("%s: Invalid parameters\n", __func__); return ERR_PTR(-EINVAL); @@ -701,7 +707,7 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, } /** - * _opp_add_dynamic() - Allocate a dynamic OPP. + * _opp_add_v1() - Allocate a OPP based on v1 bindings. * @dev: device for which we do this operation * @freq: Frequency in Hz for this OPP * @u_volt: Voltage in uVolts for this OPP @@ -727,8 +733,8 @@ static int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, * Duplicate OPPs (both freq and volt are same) and !opp->available * -ENOMEM Memory allocation failure */ -static int _opp_add_dynamic(struct device *dev, unsigned long freq, - long u_volt, bool dynamic) +static int _opp_add_v1(struct device *dev, unsigned long freq, long u_volt, + bool dynamic) { struct device_opp *dev_opp; struct dev_pm_opp *new_opp; @@ -770,9 +776,10 @@ unlock: } /* TODO: Support multiple regulators */ -static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) +static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev) { u32 microvolt[3] = {0}; + u32 val; int count, ret; /* Missing property isn't a problem, but an invalid entry is */ @@ -805,6 +812,9 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) opp->u_volt_min = microvolt[1]; opp->u_volt_max = microvolt[2]; + if (!of_property_read_u32(opp->np, "opp-microamp", &val)) + opp->u_amp = val; + return 0; } @@ -869,13 +879,10 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (!of_property_read_u32(np, "clock-latency-ns", &val)) new_opp->clock_latency_ns = val; - ret = opp_get_microvolt(new_opp, dev); + ret = opp_parse_supplies(new_opp, dev); if (ret) goto free_opp; - if (!of_property_read_u32(new_opp->np, "opp-microamp", &val)) - new_opp->u_amp = val; - ret = _opp_add(dev, new_opp, dev_opp); if (ret) goto free_opp; @@ -939,7 +946,7 @@ unlock: */ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) { - return _opp_add_dynamic(dev, freq, u_volt, true); + return _opp_add_v1(dev, freq, u_volt, true); } EXPORT_SYMBOL_GPL(dev_pm_opp_add); @@ -1172,13 +1179,17 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) struct device_opp *dev_opp; int ret = 0, count = 0; + mutex_lock(&dev_opp_list_lock); + dev_opp = _managed_opp(opp_np); if (dev_opp) { /* OPPs are already managed */ if (!_add_list_dev(dev, dev_opp)) ret = -ENOMEM; + mutex_unlock(&dev_opp_list_lock); return ret; } + mutex_unlock(&dev_opp_list_lock); /* We have opp-list node now, iterate over it and add OPPs */ for_each_available_child_of_node(opp_np, np) { @@ -1196,15 +1207,20 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np) if (WARN_ON(!count)) return -ENOENT; + mutex_lock(&dev_opp_list_lock); + dev_opp = _find_device_opp(dev); if (WARN_ON(IS_ERR(dev_opp))) { ret = PTR_ERR(dev_opp); + mutex_unlock(&dev_opp_list_lock); goto free_table; } dev_opp->np = opp_np; dev_opp->shared_opp = of_property_read_bool(opp_np, "opp-shared"); + mutex_unlock(&dev_opp_list_lock); + return 0; free_table: @@ -1241,7 +1257,7 @@ static int _of_add_opp_table_v1(struct device *dev) unsigned long freq = be32_to_cpup(val++) * 1000; unsigned long volt = be32_to_cpup(val++); - if (_opp_add_dynamic(dev, freq, volt, false)) + if (_opp_add_v1(dev, freq, volt, false)) dev_warn(dev, "%s: Failed to add OPP %ld\n", __func__, freq); nr -= 2; diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c index 7654c5606307..7b445e88a0d5 100644 --- a/drivers/base/power/opp/cpu.c +++ b/drivers/base/power/opp/cpu.c @@ -10,6 +10,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/err.h> @@ -124,12 +127,12 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) struct device *dev; int cpu, ret = 0; - rcu_read_lock(); + mutex_lock(&dev_opp_list_lock); dev_opp = _find_device_opp(cpu_dev); if (IS_ERR(dev_opp)) { ret = -EINVAL; - goto out_rcu_read_unlock; + goto unlock; } for_each_cpu(cpu, cpumask) { @@ -150,10 +153,10 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) continue; } } -out_rcu_read_unlock: - rcu_read_unlock(); +unlock: + mutex_unlock(&dev_opp_list_lock); - return 0; + return ret; } EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus); diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h index dcb38f78dae4..7366b2aa8997 100644 --- a/drivers/base/power/opp/opp.h +++ b/drivers/base/power/opp/opp.h @@ -21,6 +21,9 @@ #include <linux/rculist.h> #include <linux/rcupdate.h> +/* Lock to allow exclusive modification to the device and opp lists */ +extern struct mutex dev_opp_list_lock; + /* * Internal data structure organization with the OPP layer library is as * follows: diff --git a/drivers/base/property.c b/drivers/base/property.c index de40623bbd8a..1325ff225cc4 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -598,18 +598,34 @@ unsigned int device_get_child_node_count(struct device *dev) } EXPORT_SYMBOL_GPL(device_get_child_node_count); -bool device_dma_is_coherent(struct device *dev) +bool device_dma_supported(struct device *dev) { - bool coherent = false; - + /* For DT, this is always supported. + * For ACPI, this depends on CCA, which + * is determined by the acpi_dma_supported(). + */ if (IS_ENABLED(CONFIG_OF) && dev->of_node) - coherent = of_dma_is_coherent(dev->of_node); - else - acpi_check_dma(ACPI_COMPANION(dev), &coherent); + return true; + + return acpi_dma_supported(ACPI_COMPANION(dev)); +} +EXPORT_SYMBOL_GPL(device_dma_supported); - return coherent; +enum dev_dma_attr device_get_dma_attr(struct device *dev) +{ + enum dev_dma_attr attr = DEV_DMA_NOT_SUPPORTED; + + if (IS_ENABLED(CONFIG_OF) && dev->of_node) { + if (of_dma_is_coherent(dev->of_node)) + attr = DEV_DMA_COHERENT; + else + attr = DEV_DMA_NON_COHERENT; + } else + attr = acpi_get_dma_attr(ACPI_COMPANION(dev)); + + return attr; } -EXPORT_SYMBOL_GPL(device_dma_is_coherent); +EXPORT_SYMBOL_GPL(device_get_dma_attr); /** * device_get_phy_mode - Get phy mode for given device diff --git a/drivers/clk/h8300/clk-div.c b/drivers/clk/h8300/clk-div.c index 1dd5d14d5dbe..d71d01157dbb 100644 --- a/drivers/clk/h8300/clk-div.c +++ b/drivers/clk/h8300/clk-div.c @@ -19,6 +19,7 @@ static void __init h8300_div_clk_setup(struct device_node *node) const char *parent_name; void __iomem *divcr = NULL; int width; + int offset; num_parents = of_clk_get_parent_count(node); if (num_parents < 1) { @@ -31,11 +32,14 @@ static void __init h8300_div_clk_setup(struct device_node *node) pr_err("%s: failed to map divide register", clk_name); goto error; } + offset = (unsigned long)divcr & 3; + offset = (3 - offset) * 8; + divcr = (void *)((unsigned long)divcr & ~3); parent_name = of_clk_get_parent_name(node, 0); of_property_read_u32(node, "renesas,width", &width); clk = clk_register_divider(NULL, clk_name, parent_name, - CLK_SET_RATE_GATE, divcr, 0, width, + CLK_SET_RATE_GATE, divcr, offset, width, CLK_DIVIDER_POWER_OF_TWO, &clklock); if (!IS_ERR(clk)) { of_clk_add_provider(node, of_clk_src_simple_get, clk); diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c index f1e42f8ce0fc..c5d256caa664 100644 --- a/drivers/cpufreq/arm_big_little.c +++ b/drivers/cpufreq/arm_big_little.c @@ -149,6 +149,19 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) __func__, cpu, old_cluster, new_cluster, new_rate); ret = clk_set_rate(clk[new_cluster], new_rate * 1000); + if (!ret) { + /* + * FIXME: clk_set_rate hasn't returned an error here however it + * may be that clk_change_rate failed due to hardware or + * firmware issues and wasn't able to report that due to the + * current design of the clk core layer. To work around this + * problem we will read back the clock rate and check it is + * correct. This needs to be removed once clk core is fixed. + */ + if (clk_get_rate(clk[new_cluster]) != new_rate * 1000) + ret = -EIO; + } + if (WARN_ON(ret)) { pr_err("clk_set_rate failed: %d, new cluster: %d\n", ret, new_cluster); @@ -189,15 +202,6 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate) mutex_unlock(&cluster_lock[old_cluster]); } - /* - * FIXME: clk_set_rate has to handle the case where clk_change_rate - * can fail due to hardware or firmware issues. Until the clk core - * layer is fixed, we can check here. In most of the cases we will - * be reading only the cached value anyway. This needs to be removed - * once clk core is fixed. - */ - if (bL_cpufreq_get_rate(cpu) != new_rate) - return -EIO; return 0; } diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 93c219fab850..e8cb334094b0 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -166,8 +166,7 @@ static int __init cppc_cpufreq_init(void) out: for_each_possible_cpu(i) - if (all_cpu_data[i]) - kfree(all_cpu_data[i]); + kfree(all_cpu_data[i]); kfree(all_cpu_data); return -ENODEV; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 11258c4c1b17..b260576ddb12 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -171,10 +171,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, { int i; - mutex_lock(&cpufreq_governor_lock); - if (!policy->governor_enabled) - goto out_unlock; - if (!all_cpus) { /* * Use raw_smp_processor_id() to avoid preemptible warnings. @@ -188,9 +184,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, for_each_cpu(i, policy->cpus) __gov_queue_work(i, dbs_data, delay); } - -out_unlock: - mutex_unlock(&cpufreq_governor_lock); } EXPORT_SYMBOL_GPL(gov_queue_work); @@ -229,13 +222,24 @@ static void dbs_timer(struct work_struct *work) struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, dwork.work); struct cpu_common_dbs_info *shared = cdbs->shared; - struct cpufreq_policy *policy = shared->policy; - struct dbs_data *dbs_data = policy->governor_data; + struct cpufreq_policy *policy; + struct dbs_data *dbs_data; unsigned int sampling_rate, delay; bool modify_all = true; mutex_lock(&shared->timer_mutex); + policy = shared->policy; + + /* + * Governor might already be disabled and there is no point continuing + * with the work-handler. + */ + if (!policy) + goto unlock; + + dbs_data = policy->governor_data; + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -252,6 +256,7 @@ static void dbs_timer(struct work_struct *work) delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); gov_queue_work(dbs_data, policy, delay, modify_all); +unlock: mutex_unlock(&shared->timer_mutex); } @@ -478,9 +483,17 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, if (!shared || !shared->policy) return -EBUSY; + /* + * Work-handler must see this updated, as it should not proceed any + * further after governor is disabled. And so timer_mutex is taken while + * updating this value. + */ + mutex_lock(&shared->timer_mutex); + shared->policy = NULL; + mutex_unlock(&shared->timer_mutex); + gov_cancel_work(dbs_data, policy); - shared->policy = NULL; mutex_destroy(&shared->timer_mutex); return 0; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 93a3c635ea27..2e31d097def6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -684,8 +684,6 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - pr_info("intel_pstate: HWP enabled\n"); - wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -1557,8 +1555,10 @@ static int __init intel_pstate_init(void) if (!all_cpu_data) return -ENOMEM; - if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) + if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) { + pr_info("intel_pstate: HWP enabled\n"); hwp_active++; + } if (!hwp_active && hwp_only) goto out; @@ -1593,8 +1593,10 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; - if (!strcmp(str, "no_hwp")) + if (!strcmp(str, "no_hwp")) { + pr_info("intel_pstate: HWP disabled\n"); no_hwp = 1; + } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 9e231f52150c..051a8a8224cd 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -212,11 +212,11 @@ static void s5pv210_set_refresh(enum s5pv210_dmc_port ch, unsigned long freq) /* Find current DRAM frequency */ tmp = s5pv210_dram_conf[ch].freq; - do_div(tmp, freq); + tmp /= freq; tmp1 = s5pv210_dram_conf[ch].refresh; - do_div(tmp1, tmp); + tmp1 /= tmp; __raw_writel(tmp1, reg); } diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index 8b923b7e9389..01b50cb4c982 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -94,6 +94,7 @@ static int ccp_platform_probe(struct platform_device *pdev) struct ccp_device *ccp; struct ccp_platform *ccp_platform; struct device *dev = &pdev->dev; + enum dev_dma_attr attr; struct resource *ior; int ret; @@ -118,18 +119,24 @@ static int ccp_platform_probe(struct platform_device *pdev) } ccp->io_regs = ccp->io_map; - ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); - if (ret) { - dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); goto e_err; } - ccp_platform->coherent = device_dma_is_coherent(ccp->dev); + ccp_platform->coherent = (attr == DEV_DMA_COHERENT); if (ccp_platform->coherent) ccp->axcache = CACHE_WB_NO_ALLOC; else ccp->axcache = CACHE_NONE; + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + dev_set_drvdata(dev, ccp); ret = ccp_init(ccp); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 7dd893331785..618d952c2984 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -342,6 +342,7 @@ static int xgbe_probe(struct platform_device *pdev) struct resource *res; const char *phy_mode; unsigned int i, phy_memnum, phy_irqnum; + enum dev_dma_attr attr; int ret; DBGPR("--> xgbe_probe\n"); @@ -609,7 +610,12 @@ static int xgbe_probe(struct platform_device *pdev) goto err_io; /* Set the DMA coherency values */ - pdata->coherent = device_dma_is_coherent(pdata->dev); + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto err_io; + } + pdata->coherent = (attr == DEV_DMA_COHERENT); if (pdata->coherent) { pdata->axdomain = XGBE_DMA_OS_AXDOMAIN; pdata->arcache = XGBE_DMA_OS_ARCACHE; diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c index ff27177f49ed..b1449f71601c 100644 --- a/drivers/of/of_pci.c +++ b/drivers/of/of_pci.c @@ -143,26 +143,6 @@ void of_pci_check_probe_only(void) } EXPORT_SYMBOL_GPL(of_pci_check_probe_only); -/** - * of_pci_dma_configure - Setup DMA configuration - * @dev: ptr to pci_dev struct of the PCI device - * - * Function to update PCI devices's DMA configuration using the same - * info from the OF node of host bridge's parent (if any). - */ -void of_pci_dma_configure(struct pci_dev *pci_dev) -{ - struct device *dev = &pci_dev->dev; - struct device *bridge = pci_get_host_bridge_device(pci_dev); - - if (!bridge->parent) - return; - - of_dma_configure(dev, bridge->parent->of_node); - pci_put_host_bridge_device(bridge); -} -EXPORT_SYMBOL_GPL(of_pci_dma_configure); - #if defined(CONFIG_OF_ADDRESS) /** * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f53b8e85f137..e735c728e3b3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -6,6 +6,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/pci.h> +#include <linux/of_device.h> #include <linux/of_pci.h> #include <linux/pci_hotplug.h> #include <linux/slab.h> @@ -13,6 +14,7 @@ #include <linux/cpumask.h> #include <linux/pci-aspm.h> #include <linux/aer.h> +#include <linux/acpi.h> #include <asm-generic/pci-bridge.h> #include "pci.h" @@ -1672,6 +1674,34 @@ static void pci_set_msi_domain(struct pci_dev *dev) dev_set_msi_domain(&dev->dev, d); } +/** + * pci_dma_configure - Setup DMA configuration + * @dev: ptr to pci_dev struct of the PCI device + * + * Function to update PCI devices's DMA configuration using the same + * info from the OF node or ACPI node of host bridge's parent (if any). + */ +static void pci_dma_configure(struct pci_dev *dev) +{ + struct device *bridge = pci_get_host_bridge_device(dev); + + if (IS_ENABLED(CONFIG_OF) && dev->dev.of_node) { + if (bridge->parent) + of_dma_configure(&dev->dev, bridge->parent->of_node); + } else if (has_acpi_companion(bridge)) { + struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); + enum dev_dma_attr attr = acpi_get_dma_attr(adev); + + if (attr == DEV_DMA_NOT_SUPPORTED) + dev_warn(&dev->dev, "DMA not supported.\n"); + else + arch_setup_dma_ops(&dev->dev, 0, 0, NULL, + attr == DEV_DMA_COHERENT); + } + + pci_put_host_bridge_device(bridge); +} + void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) { int ret; @@ -1685,7 +1715,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; - of_pci_dma_configure(dev); + pci_dma_configure(dev); pci_set_dma_max_seg_size(dev, 65536); pci_set_dma_seg_boundary(dev, 0xffffffff); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index d11eff8a4efe..ad0a5ff3d4cd 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -390,39 +390,6 @@ struct acpi_data_node { struct completion kobj_done; }; -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - bool ret = false; - - if (!adev) - return ret; - - /** - * Currently, we only support _CCA=1 (i.e. coherent_dma=1) - * This should be equivalent to specifyig dma-coherent for - * a device in OF. - * - * For the case when _CCA=0 (i.e. coherent_dma=0 && cca_seen=1), - * There are two cases: - * case 1. Do not support and disable DMA. - * case 2. Support but rely on arch-specific cache maintenance for - * non-coherence DMA operations. - * Currently, we implement case 1 above. - * - * For the case when _CCA is missing (i.e. cca_seen=0) and - * platform specifies ACPI_CCA_REQUIRED, we do not support DMA, - * and fallback to arch-specific default handling. - * - * See acpi_init_coherency() for more info. - */ - if (adev->flags.coherent_dma) { - ret = true; - if (coherent) - *coherent = adev->flags.coherent_dma; - } - return ret; -} - static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return fwnode && (fwnode->type == FWNODE_ACPI @@ -595,6 +562,9 @@ struct acpi_pci_root { /* helper */ +bool acpi_dma_supported(struct acpi_device *adev); +enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); + struct acpi_device *acpi_find_child_device(struct acpi_device *parent, u64 address, bool check_children); int acpi_is_root_bridge(acpi_handle); diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 72d8803832ff..1bfa602958f2 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -163,9 +163,10 @@ static inline __must_check long __copy_to_user(void __user *to, #define put_user(x, ptr) \ ({ \ + void *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_WRITE, ptr, sizeof(*ptr)) ? \ - __put_user(x, ptr) : \ + access_ok(VERIFY_WRITE, __p, sizeof(*ptr)) ? \ + __put_user((x), ((__typeof__(*(ptr)) *)__p)) : \ -EFAULT; \ }) @@ -225,9 +226,10 @@ extern int __put_user_bad(void) __attribute__((noreturn)); #define get_user(x, ptr) \ ({ \ + const void *__p = (ptr); \ might_fault(); \ - access_ok(VERIFY_READ, ptr, sizeof(*ptr)) ? \ - __get_user(x, ptr) : \ + access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ + __get_user((x), (__typeof__(*(ptr)) *)__p) : \ -EFAULT; \ }) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ebfac2fe0c81..054833939995 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -601,11 +601,16 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) +static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; } +static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + return DEV_DMA_NOT_SUPPORTED; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 008fc67d0d96..68b575afe5f5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,10 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +/* Called with interrupts disabled. */ +extern void __context_tracking_enter(enum ctx_state state); +extern void __context_tracking_exit(enum ctx_state state); + extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); @@ -18,13 +22,13 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { if (context_tracking_is_enabled()) - context_tracking_user_enter(); + context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_is_enabled()) - context_tracking_user_exit(); + context_tracking_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) @@ -88,13 +92,13 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; if (context_tracking_is_enabled()) - context_tracking_enter(CONTEXT_GUEST); + __context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { if (context_tracking_is_enabled()) - context_tracking_exit(CONTEXT_GUEST); + __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_enabled()) vtime_guest_exit(current); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 242a6d2b53ff..5706a2108f0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ + #endif diff --git a/include/linux/math64.h b/include/linux/math64.h index c45c089bfdac..6e8b5b270ffe 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * mul) >> shift); +} +#endif /* mul_u64_u64_shr */ + #else #ifndef mul_u64_u32_shr @@ -161,6 +168,79 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } rl, rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rl.ll = (u64)a0.l.low * b0.l.low; + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + /* + * Each of these lines computes a 64-bit intermediate result into "c", + * starting at bits 32-95. The low 32-bits go into the result of the + * multiplication, the high 32-bits are carried into the next step. + */ + rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low; + rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + /* + * The 128-bit result of the multiplication is in rl.ll and rh.ll, + * shift it right and throw away the high part of the result. + */ + if (shift == 0) + return rl.ll; + if (shift < 64) + return (rl.ll >> shift) | (rh.ll << (64 - shift)); + return rh.ll >> (shift & 63); +} +#endif /* mul_u64_u64_shr */ + #endif +#ifndef mul_u64_u32_div +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } u, rl, rh; + + u.ll = a; + rl.ll = (u64)u.l.low * mul; + rh.ll = (u64)u.l.high * mul + rl.l.high; + + /* Bits 32-63 of the result will be in rh.l.low. */ + rl.l.high = do_div(rh.ll, divisor); + + /* Bits 0-31 of the result will be in rl.l.low. */ + do_div(rl.ll, divisor); + + rl.l.high = rh.l.low; + return rl.ll; +} +#endif /* mul_u64_u32_div */ + #endif /* _LINUX_MATH64_H */ diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 38c0533a3359..2c51ee78b1c0 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); -void of_pci_dma_configure(struct pci_dev *pci_dev); void of_pci_check_probe_only(void); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) @@ -53,8 +52,6 @@ of_get_pci_domain_nr(struct device_node *node) return -1; } -static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } - static inline void of_pci_check_probe_only(void) { } #endif diff --git a/include/linux/property.h b/include/linux/property.h index 463de52fe891..0a3705a7c9f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -27,6 +27,12 @@ enum dev_prop_type { DEV_PROP_MAX, }; +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); @@ -168,7 +174,9 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); -bool device_dma_is_coherent(struct device *dev); +bool device_dma_supported(struct device *dev); + +enum dev_dma_attr device_get_dma_attr(struct device *dev); int device_get_phy_mode(struct device *dev); diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 0a495ab35bc7..d8560ee3bab7 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -58,36 +58,13 @@ static void context_tracking_recursion_exit(void) * instructions to execute won't use any RCU read side critical section * because this function sets RCU in extended quiescent state. */ -void context_tracking_enter(enum ctx_state state) +void __context_tracking_enter(enum ctx_state state) { - unsigned long flags; - - /* - * Repeat the user_enter() check here because some archs may be calling - * this from asm and if no CPU needs context tracking, they shouldn't - * go further. Repeat the check here until they support the inline static - * key check. - */ - if (!context_tracking_is_enabled()) - return; - - /* - * Some contexts may involve an exception occuring in an irq, - * leading to that nesting: - * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() - * This would mess up the dyntick_nesting count though. And rcu_irq_*() - * helpers are enough to protect RCU uses inside the exception. So - * just return immediately if we detect we are in an IRQ. - */ - if (in_interrupt()) - return; - /* Kernel threads aren't supposed to go to userspace */ WARN_ON_ONCE(!current->mm); - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if ( __this_cpu_read(context_tracking.state) != state) { if (__this_cpu_read(context_tracking.active)) { @@ -120,7 +97,27 @@ void context_tracking_enter(enum ctx_state state) __this_cpu_write(context_tracking.state, state); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_enter); +EXPORT_SYMBOL_GPL(__context_tracking_enter); + +void context_tracking_enter(enum ctx_state state) +{ + unsigned long flags; + + /* + * Some contexts may involve an exception occuring in an irq, + * leading to that nesting: + * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() + * This would mess up the dyntick_nesting count though. And rcu_irq_*() + * helpers are enough to protect RCU uses inside the exception. So + * just return immediately if we detect we are in an IRQ. + */ + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_enter(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_enter); @@ -128,7 +125,7 @@ EXPORT_SYMBOL_GPL(context_tracking_enter); void context_tracking_user_enter(void) { - context_tracking_enter(CONTEXT_USER); + user_enter(); } NOKPROBE_SYMBOL(context_tracking_user_enter); @@ -144,19 +141,10 @@ NOKPROBE_SYMBOL(context_tracking_user_enter); * This call supports re-entrancy. This way it can be called from any exception * handler without needing to know if we came from userspace or not. */ -void context_tracking_exit(enum ctx_state state) +void __context_tracking_exit(enum ctx_state state) { - unsigned long flags; - - if (!context_tracking_is_enabled()) - return; - - if (in_interrupt()) - return; - - local_irq_save(flags); if (!context_tracking_recursion_enter()) - goto out_irq_restore; + return; if (__this_cpu_read(context_tracking.state) == state) { if (__this_cpu_read(context_tracking.active)) { @@ -173,7 +161,19 @@ void context_tracking_exit(enum ctx_state state) __this_cpu_write(context_tracking.state, CONTEXT_KERNEL); } context_tracking_recursion_exit(); -out_irq_restore: +} +NOKPROBE_SYMBOL(__context_tracking_exit); +EXPORT_SYMBOL_GPL(__context_tracking_exit); + +void context_tracking_exit(enum ctx_state state) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + __context_tracking_exit(state); local_irq_restore(flags); } NOKPROBE_SYMBOL(context_tracking_exit); @@ -181,7 +181,7 @@ EXPORT_SYMBOL_GPL(context_tracking_exit); void context_tracking_user_exit(void) { - context_tracking_exit(CONTEXT_USER); + user_exit(); } NOKPROBE_SYMBOL(context_tracking_user_exit); diff --git a/tools/power/cpupower/debug/i386/dump_psb.c b/tools/power/cpupower/debug/i386/dump_psb.c index 8d6a47514253..2c768cf70128 100644 --- a/tools/power/cpupower/debug/i386/dump_psb.c +++ b/tools/power/cpupower/debug/i386/dump_psb.c @@ -134,7 +134,7 @@ next_one: } static struct option info_opts[] = { - {.name = "numpst", .has_arg=no_argument, .flag=NULL, .val='n'}, + {"numpst", no_argument, NULL, 'n'}, }; void print_help(void) diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1 index 3e6799d7a79f..580c4e3ea92a 100644 --- a/tools/power/cpupower/man/cpupower-idle-set.1 +++ b/tools/power/cpupower/man/cpupower-idle-set.1 @@ -20,7 +20,9 @@ Disable a specific processor sleep state. Enable a specific processor sleep state. .TP \fB\-D\fR \fB\-\-disable-by-latency\fR <LATENCY> -Disable all idle states with a equal or higher latency than <LATENCY> +Disable all idle states with a equal or higher latency than <LATENCY>. + +Enable all idle states with a latency lower than <LATENCY>. .TP \fB\-E\fR \fB\-\-enable-all\fR Enable all idle states if not enabled already. diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index b4b90a97662c..0e6764330241 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -536,21 +536,21 @@ static int get_latency(unsigned int cpu, unsigned int human) } static struct option info_opts[] = { - { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, - { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, - { .name = "freq", .has_arg = no_argument, .flag = NULL, .val = 'f'}, - { .name = "hwfreq", .has_arg = no_argument, .flag = NULL, .val = 'w'}, - { .name = "hwlimits", .has_arg = no_argument, .flag = NULL, .val = 'l'}, - { .name = "driver", .has_arg = no_argument, .flag = NULL, .val = 'd'}, - { .name = "policy", .has_arg = no_argument, .flag = NULL, .val = 'p'}, - { .name = "governors", .has_arg = no_argument, .flag = NULL, .val = 'g'}, - { .name = "related-cpus", .has_arg = no_argument, .flag = NULL, .val = 'r'}, - { .name = "affected-cpus",.has_arg = no_argument, .flag = NULL, .val = 'a'}, - { .name = "stats", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, - { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, - { .name = "no-rounding", .has_arg = no_argument, .flag = NULL, .val = 'n'}, + {"debug", no_argument, NULL, 'e'}, + {"boost", no_argument, NULL, 'b'}, + {"freq", no_argument, NULL, 'f'}, + {"hwfreq", no_argument, NULL, 'w'}, + {"hwlimits", no_argument, NULL, 'l'}, + {"driver", no_argument, NULL, 'd'}, + {"policy", no_argument, NULL, 'p'}, + {"governors", no_argument, NULL, 'g'}, + {"related-cpus", no_argument, NULL, 'r'}, + {"affected-cpus", no_argument, NULL, 'a'}, + {"stats", no_argument, NULL, 's'}, + {"latency", no_argument, NULL, 'y'}, + {"proc", no_argument, NULL, 'o'}, + {"human", no_argument, NULL, 'm'}, + {"no-rounding", no_argument, NULL, 'n'}, { }, }; diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 4e213576381e..0fbd1a22c0a9 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -22,11 +22,11 @@ #define NORM_FREQ_LEN 32 static struct option set_opts[] = { - { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, - { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, - { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, - { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, + {"min", required_argument, NULL, 'd'}, + {"max", required_argument, NULL, 'u'}, + {"governor", required_argument, NULL, 'g'}, + {"freq", required_argument, NULL, 'f'}, + {"related", no_argument, NULL, 'r'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 75e66de7e7a7..750c1d82c3f7 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -126,8 +126,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) } static struct option info_opts[] = { - { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, - { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, + {"silent", no_argument, NULL, 's'}, + {"proc", no_argument, NULL, 'o'}, { }, }; diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c index d45d8d775c02..d6b6ae44b8c2 100644 --- a/tools/power/cpupower/utils/cpuidle-set.c +++ b/tools/power/cpupower/utils/cpuidle-set.c @@ -13,15 +13,11 @@ #include "helpers/sysfs.h" static struct option info_opts[] = { - { .name = "disable", - .has_arg = required_argument, .flag = NULL, .val = 'd'}, - { .name = "enable", - .has_arg = required_argument, .flag = NULL, .val = 'e'}, - { .name = "disable-by-latency", - .has_arg = required_argument, .flag = NULL, .val = 'D'}, - { .name = "enable-all", - .has_arg = no_argument, .flag = NULL, .val = 'E'}, - { }, + {"disable", required_argument, NULL, 'd'}, + {"enable", required_argument, NULL, 'e'}, + {"disable-by-latency", required_argument, NULL, 'D'}, + {"enable-all", no_argument, NULL, 'E'}, + { }, }; @@ -148,14 +144,21 @@ int cmd_idle_set(int argc, char **argv) (cpu, idlestate); state_latency = sysfs_get_idlestate_latency (cpu, idlestate); - printf("CPU: %u - idlestate %u - state_latency: %llu - latency: %llu\n", - cpu, idlestate, state_latency, latency); - if (disabled == 1 || latency > state_latency) + if (disabled == 1) { + if (latency > state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 0); + if (ret == 0) + printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu); + } continue; - ret = sysfs_idlestate_disable - (cpu, idlestate, 1); - if (ret == 0) + } + if (latency <= state_latency){ + ret = sysfs_idlestate_disable + (cpu, idlestate, 1); + if (ret == 0) printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu); + } } break; case 'E': diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 136d979e9586..10299f2e9d2a 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -17,8 +17,8 @@ #include "helpers/sysfs.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, - { }, + {"perf-bias", optional_argument, NULL, 'b'}, + { }, }; static void print_wrong_arg_exit(void) diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 573c75f8e3f5..3e6f374f8dd7 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -18,7 +18,7 @@ #include "helpers/bitmask.h" static struct option set_opts[] = { - { .name = "perf-bias", .has_arg = required_argument, .flag = NULL, .val = 'b'}, + {"perf-bias", required_argument, NULL, 'b'}, { }, }; diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index cea398c176e7..9cbb7fd75171 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -73,18 +73,22 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - if (!cpu_top->core_info[cpu].is_online) - continue; if(sysfs_topology_read_file( cpu, "physical_package_id", - &(cpu_top->core_info[cpu].pkg)) < 0) - return -1; + &(cpu_top->core_info[cpu].pkg)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } if(sysfs_topology_read_file( cpu, "core_id", - &(cpu_top->core_info[cpu].core)) < 0) - return -1; + &(cpu_top->core_info[cpu].core)) < 0) { + cpu_top->core_info[cpu].pkg = -1; + cpu_top->core_info[cpu].core = -1; + continue; + } } qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), @@ -95,12 +99,15 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) done by pkg value. */ last_pkg = cpu_top->core_info[0].pkg; for(cpu = 1; cpu < cpus; cpu++) { - if(cpu_top->core_info[cpu].pkg != last_pkg) { + if (cpu_top->core_info[cpu].pkg != last_pkg && + cpu_top->core_info[cpu].pkg != -1) { + last_pkg = cpu_top->core_info[cpu].pkg; cpu_top->pkgs++; } } - cpu_top->pkgs++; + if (!cpu_top->core_info[0].pkg == -1) + cpu_top->pkgs++; /* Intel's cores count is not consecutively numbered, there may * be a core_id of 3, but none of 2. Assume there always is 0 diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index c4bae9203a69..05f953f0f0a0 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -143,6 +143,9 @@ void print_results(int topology_depth, int cpu) /* Be careful CPUs may got resorted for pkg value do not just use cpu */ if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) return; + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg == -1) + return; if (topology_depth > 2) printf("%4d|", cpu_top.core_info[cpu].pkg); @@ -191,7 +194,8 @@ void print_results(int topology_depth, int cpu) * It's up to the monitor plug-in to check .is_online, this one * is just for additional info. */ - if (!cpu_top.core_info[cpu].is_online) { + if (!cpu_top.core_info[cpu].is_online && + cpu_top.core_info[cpu].pkg != -1) { printf(_(" *is offline\n")); return; } else @@ -388,6 +392,9 @@ int cmd_monitor(int argc, char **argv) return EXIT_FAILURE; } + if (!cpu_top.core_info[0].is_online) + printf("WARNING: at least one cpu is offline\n"); + /* Default is: monitor all CPUs */ if (bitmask_isallclear(cpus_chosen)) bitmask_setall(cpus_chosen); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index bde0ef1a63df..d8e4b20b6d54 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -75,6 +75,7 @@ unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; double base_hz; +unsigned int has_base_hz; double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; @@ -96,6 +97,7 @@ unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; +double discover_bclk(unsigned int family, unsigned int model); #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -511,9 +513,13 @@ int format_counters(struct thread_data *t, struct core_data *c, } /* Bzy_MHz */ - if (has_aperf) - outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); + if (has_aperf) { + if (has_base_hz) + outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); + else + outp += sprintf(outp, "%8.0f", + 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + } /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -1158,12 +1164,6 @@ int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, static void calculate_tsc_tweak() { - unsigned long long msr; - unsigned int base_ratio; - - get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); - base_ratio = (msr >> 8) & 0xFF; - base_hz = base_ratio * bclk * 1000000; tsc_tweak = base_hz / tsc_hz; } @@ -1440,7 +1440,7 @@ dump_config_tdp(void) get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); - fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xEF); + fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); fprintf(stderr, ")\n"); } @@ -1821,6 +1821,7 @@ void check_permissions() int probe_nhm_msrs(unsigned int family, unsigned int model) { unsigned long long msr; + unsigned int base_ratio; int *pkg_cstate_limits; if (!genuine_intel) @@ -1829,6 +1830,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) if (family != 6) return 0; + bclk = discover_bclk(family, model); + switch (model) { case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ @@ -1871,9 +1874,13 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) return 0; } get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + + base_hz = base_ratio * bclk * 1000000; + has_base_hz = 1; return 1; } int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) @@ -2780,7 +2787,6 @@ void process_cpuid() do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - bclk = discover_bclk(family, model); rapl_probe(family, model); perf_limit_reasons_probe(family, model); |