diff options
Diffstat (limited to 'arch')
158 files changed, 2007 insertions, 1358 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5085a1eab9fc..054ead960f98 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1310,7 +1310,7 @@ config SCHED_SMT config HAVE_ARM_SCU bool help - This option enables support for the ARM system coherency unit + This option enables support for the ARM snoop control unit config HAVE_ARM_ARCH_TIMER bool "Architected timer support" @@ -1322,7 +1322,6 @@ config HAVE_ARM_ARCH_TIMER config HAVE_ARM_TWD bool - select TIMER_OF if OF help This options enables support for the ARM timer and watchdog unit diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu index 1168a03c8525..36c80d3dd93f 100644 --- a/arch/arm/Kconfig-nommu +++ b/arch/arm/Kconfig-nommu @@ -20,10 +20,12 @@ config DRAM_SIZE config FLASH_MEM_BASE hex 'FLASH Base Address' if SET_MEM_PARAM + depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T default 0x00400000 config FLASH_SIZE hex 'FLASH Size' if SET_MEM_PARAM + depends on CPU_ARM740T || CPU_ARM946E || CPU_ARM940T default 0x00400000 config PROCESSOR_ID diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 00000e91ad65..807a7d06c2a0 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer +LDFLAGS_vmlinux := --no-undefined -X --pic-veneer ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile index 83e1a076a5d6..981a8d03f064 100644 --- a/arch/arm/boot/bootp/Makefile +++ b/arch/arm/boot/bootp/Makefile @@ -8,7 +8,7 @@ GCOV_PROFILE := n -LDFLAGS_bootp :=-p --no-undefined -X \ +LDFLAGS_bootp := --no-undefined -X \ --defsym initrd_phys=$(INITRD_PHYS) \ --defsym params_phys=$(PARAMS_PHYS) -T AFLAGS_initrd.o :=-DINITRD=\"$(INITRD)\" diff --git a/arch/arm/boot/bootp/init.S b/arch/arm/boot/bootp/init.S index 78b508075161..142927e5f485 100644 --- a/arch/arm/boot/bootp/init.S +++ b/arch/arm/boot/bootp/init.S @@ -44,7 +44,7 @@ _start: add lr, pc, #-0x8 @ lr = current load addr */ movne r10, #0 @ terminator movne r4, #2 @ Size of this entry (2 words) - stmneia r9, {r4, r5, r10} @ Size, ATAG_CORE, terminator + stmiane r9, {r4, r5, r10} @ Size, ATAG_CORE, terminator /* * find the end of the tag list, and then add an INITRD tag on the end. diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6114ae6ea466..9219389bbe61 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -132,8 +132,6 @@ endif ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 endif -# ? -LDFLAGS_vmlinux += -p # Report unresolved symbol references LDFLAGS_vmlinux += --no-undefined # Delete all temporary local symbols diff --git a/arch/arm/boot/compressed/ll_char_wr.S b/arch/arm/boot/compressed/ll_char_wr.S index 8517c8606b4a..b1dcdb9f4030 100644 --- a/arch/arm/boot/compressed/ll_char_wr.S +++ b/arch/arm/boot/compressed/ll_char_wr.S @@ -75,7 +75,7 @@ Lrow4bpplp: tst r1, #7 @ avoid using r7 directly after str r7, [r0, -r5]! subne r1, r1, #1 - ldrneb r7, [r6, r1] + ldrbne r7, [r6, r1] bne Lrow4bpplp ldmfd sp!, {r4 - r7, pc} @@ -103,7 +103,7 @@ Lrow8bpplp: sub r0, r0, r5 @ avoid ip stmia r0, {r4, ip} subne r1, r1, #1 - ldrneb r7, [r6, r1] + ldrbne r7, [r6, r1] bne Lrow8bpplp ldmfd sp!, {r4 - r7, pc} diff --git a/arch/arm/boot/dts/imx28-cfa10036.dts b/arch/arm/boot/dts/imx28-cfa10036.dts index d3e3622979c5..de48b5808ef6 100644 --- a/arch/arm/boot/dts/imx28-cfa10036.dts +++ b/arch/arm/boot/dts/imx28-cfa10036.dts @@ -11,6 +11,7 @@ /dts-v1/; #include "imx28.dtsi" +#include <dt-bindings/gpio/gpio.h> / { model = "Crystalfontz CFA-10036 Board"; @@ -96,7 +97,7 @@ pinctrl-names = "default"; pinctrl-0 = <&ssd1306_cfa10036>; reg = <0x3c>; - reset-gpios = <&gpio2 7 0>; + reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>; solomon,height = <32>; solomon,width = <128>; solomon,page-offset = <0>; diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index ad574d20415c..1b1b82b37ce0 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -381,7 +381,7 @@ static int __init nocache_trampoline(unsigned long _arg) unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); phys_reset_t phys_reset; - mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_set_entry_vector(cpu, cluster, cpu_resume_no_hyp); setup_mm_for_reboot(); __mcpm_cpu_going_down(cpu, cluster); diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index f6f485f4744e..d15b8c99f1b3 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -55,7 +55,7 @@ #define ICH_VTR __ACCESS_CP15(c12, 4, c11, 1) #define ICH_MISR __ACCESS_CP15(c12, 4, c11, 2) #define ICH_EISR __ACCESS_CP15(c12, 4, c11, 3) -#define ICH_ELSR __ACCESS_CP15(c12, 4, c11, 5) +#define ICH_ELRSR __ACCESS_CP15(c12, 4, c11, 5) #define ICH_VMCR __ACCESS_CP15(c12, 4, c11, 7) #define __LR0(x) __ACCESS_CP15(c12, 4, c12, x) @@ -152,7 +152,7 @@ CPUIF_MAP(ICH_HCR, ICH_HCR_EL2) CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) -CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2) +CPUIF_MAP(ICH_ELRSR, ICH_ELRSR_EL2) CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 28a48e0d4cca..b59921a560da 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -376,9 +376,9 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .macro usraccoff, instr, reg, ptr, inc, off, cond, abort, t=TUSER() 9999: .if \inc == 1 - \instr\cond\()b\()\t\().w \reg, [\ptr, #\off] + \instr\()b\t\cond\().w \reg, [\ptr, #\off] .elseif \inc == 4 - \instr\cond\()\t\().w \reg, [\ptr, #\off] + \instr\t\cond\().w \reg, [\ptr, #\off] .else .error "Unsupported inc macro argument" .endif @@ -417,9 +417,9 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .rept \rept 9999: .if \inc == 1 - \instr\cond\()b\()\t \reg, [\ptr], #\inc + \instr\()b\t\cond \reg, [\ptr], #\inc .elseif \inc == 4 - \instr\cond\()\t \reg, [\ptr], #\inc + \instr\t\cond \reg, [\ptr], #\inc .else .error "Unsupported inc macro argument" .endif @@ -460,7 +460,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .macro check_uaccess, addr:req, size:req, limit:req, tmp:req, bad:req #ifndef CONFIG_CPU_USE_DOMAINS adds \tmp, \addr, #\size - 1 - sbcccs \tmp, \tmp, \limit + sbcscc \tmp, \tmp, \limit bcs \bad #ifdef CONFIG_CPU_SPECTRE movcs \addr, #0 @@ -474,7 +474,7 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) sub \tmp, \limit, #1 subs \tmp, \tmp, \addr @ tmp = limit - 1 - addr addhs \tmp, \tmp, #1 @ if (tmp >= 0) { - subhss \tmp, \tmp, \size @ tmp = limit - (addr + size) } + subshs \tmp, \tmp, \size @ tmp = limit - (addr + size) } movlo \addr, #0 @ if (tmp < 0) addr = NULL csdb #endif diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 69772e742a0a..83ae97c049d9 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -11,6 +11,8 @@ #define sev() __asm__ __volatile__ ("sev" : : : "memory") #define wfe() __asm__ __volatile__ ("wfe" : : : "memory") #define wfi() __asm__ __volatile__ ("wfi" : : : "memory") +#else +#define wfe() do { } while (0) #endif #if __LINUX_ARM_ARCH__ >= 7 diff --git a/arch/arm/include/asm/hardware/entry-macro-iomd.S b/arch/arm/include/asm/hardware/entry-macro-iomd.S index 8c215acd9b57..f7692731e514 100644 --- a/arch/arm/include/asm/hardware/entry-macro-iomd.S +++ b/arch/arm/include/asm/hardware/entry-macro-iomd.S @@ -16,25 +16,25 @@ ldr \tmp, =irq_prio_h teq \irqstat, #0 #ifdef IOMD_BASE - ldreqb \irqstat, [\base, #IOMD_DMAREQ] @ get dma + ldrbeq \irqstat, [\base, #IOMD_DMAREQ] @ get dma addeq \tmp, \tmp, #256 @ irq_prio_h table size teqeq \irqstat, #0 bne 2406f #endif - ldreqb \irqstat, [\base, #IOMD_IRQREQA] @ get low priority + ldrbeq \irqstat, [\base, #IOMD_IRQREQA] @ get low priority addeq \tmp, \tmp, #256 @ irq_prio_d table size teqeq \irqstat, #0 #ifdef IOMD_IRQREQC - ldreqb \irqstat, [\base, #IOMD_IRQREQC] + ldrbeq \irqstat, [\base, #IOMD_IRQREQC] addeq \tmp, \tmp, #256 @ irq_prio_l table size teqeq \irqstat, #0 #endif #ifdef IOMD_IRQREQD - ldreqb \irqstat, [\base, #IOMD_IRQREQD] + ldrbeq \irqstat, [\base, #IOMD_IRQREQD] addeq \tmp, \tmp, #256 @ irq_prio_lc table size teqeq \irqstat, #0 #endif -2406: ldrneb \irqnr, [\tmp, \irqstat] @ get IRQ number +2406: ldrbne \irqnr, [\tmp, \irqstat] @ get IRQ number .endm /* diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 77121b713bef..8927cae7c966 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -265,6 +265,14 @@ static inline bool kvm_vcpu_dabt_isextabt(struct kvm_vcpu *vcpu) } } +static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static inline u32 kvm_vcpu_hvc_get_imm(struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & HSR_HVC_IMM_MASK; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 50e89869178a..770d73257ad9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -26,6 +26,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> #include <asm/fpstate.h> +#include <asm/smp_plat.h> #include <kvm/arm_arch_timer.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -57,10 +58,13 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_reset_coprocs(struct kvm_vcpu *vcpu); -struct kvm_arch { - /* VTTBR value associated with below pgd and vmid */ - u64 vttbr; +struct kvm_vmid { + /* The VMID generation used for the virt. memory system */ + u64 vmid_gen; + u32 vmid; +}; +struct kvm_arch { /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -70,11 +74,11 @@ struct kvm_arch { */ /* The VMID generation used for the virt. memory system */ - u64 vmid_gen; - u32 vmid; + struct kvm_vmid vmid; /* Stage-2 page table */ pgd_t *pgd; + phys_addr_t pgd_phys; /* Interrupt controller */ struct vgic_dist vgic; @@ -148,6 +152,13 @@ struct kvm_cpu_context { typedef struct kvm_cpu_context kvm_cpu_context_t; +static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, + int cpu) +{ + /* The host's MPIDR is immutable, so let's set it up at boot time */ + cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu); +} + struct vcpu_reset_state { unsigned long pc; unsigned long r0; @@ -224,7 +235,35 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -unsigned long kvm_call_hyp(void *hypfn, ...); + +unsigned long __kvm_call_hyp(void *hypfn, ...); + +/* + * The has_vhe() part doesn't get emitted, but is used for type-checking. + */ +#define kvm_call_hyp(f, ...) \ + do { \ + if (has_vhe()) { \ + f(__VA_ARGS__); \ + } else { \ + __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + } \ + } while(0) + +#define kvm_call_hyp_ret(f, ...) \ + ({ \ + typeof(f(__VA_ARGS__)) ret; \ + \ + if (has_vhe()) { \ + ret = f(__VA_ARGS__); \ + } else { \ + ret = __kvm_call_hyp(kvm_ksym_ref(f), \ + ##__VA_ARGS__); \ + } \ + \ + ret; \ + }) + void force_vm_exit(const cpumask_t *mask); int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -275,7 +314,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, * compliant with the PCS!). */ - kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); + __kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } static inline void __cpu_init_stage2(void) diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index e93a0cac9add..87bcd18df8d5 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -40,6 +40,7 @@ #define TTBR1 __ACCESS_CP15_64(1, c2) #define VTTBR __ACCESS_CP15_64(6, c2) #define PAR __ACCESS_CP15_64(0, c7) +#define CNTP_CVAL __ACCESS_CP15_64(2, c14) #define CNTV_CVAL __ACCESS_CP15_64(3, c14) #define CNTVOFF __ACCESS_CP15_64(4, c14) @@ -85,6 +86,7 @@ #define TID_PRIV __ACCESS_CP15(c13, 0, c0, 4) #define HTPIDR __ACCESS_CP15(c13, 4, c0, 2) #define CNTKCTL __ACCESS_CP15(c14, 0, c1, 0) +#define CNTP_CTL __ACCESS_CP15(c14, 0, c2, 1) #define CNTV_CTL __ACCESS_CP15(c14, 0, c3, 1) #define CNTHCTL __ACCESS_CP15(c14, 4, c1, 0) @@ -94,6 +96,8 @@ #define read_sysreg_el0(r) read_sysreg(r##_el0) #define write_sysreg_el0(v, r) write_sysreg(v, r##_el0) +#define cntp_ctl_el0 CNTP_CTL +#define cntp_cval_el0 CNTP_CVAL #define cntv_ctl_el0 CNTV_CTL #define cntv_cval_el0 CNTV_CVAL #define cntvoff_el2 CNTVOFF diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3a875fc1b63c..2de96a180166 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -421,9 +421,14 @@ static inline int hyp_map_aux_data(void) static inline void kvm_set_ipa_limit(void) {} -static inline bool kvm_cpu_has_cnp(void) +static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) { - return false; + struct kvm_vmid *vmid = &kvm->arch.vmid; + u64 vmid_field, baddr; + + baddr = kvm->arch.pgd_phys; + vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + return kvm_phys_to_vttbr(baddr) | vmid_field; } #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a757401129f9..48ce1b19069b 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -125,6 +125,9 @@ extern pgprot_t pgprot_s2_device; #define pgprot_stronglyordered(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_UNCACHED) +#define pgprot_device(prot) \ + __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_DEV_SHARED | L_PTE_SHARED | L_PTE_DIRTY | L_PTE_XN) + #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE | L_PTE_XN) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 120f4c9bbfde..57fe73ea0f72 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -89,7 +89,11 @@ extern void release_thread(struct task_struct *); unsigned long get_wchan(struct task_struct *p); #if __LINUX_ARM_ARCH__ == 6 || defined(CONFIG_ARM_ERRATA_754327) -#define cpu_relax() smp_mb() +#define cpu_relax() \ + do { \ + smp_mb(); \ + __asm__ __volatile__("nop; nop; nop; nop; nop; nop; nop; nop; nop; nop;"); \ + } while (0) #else #define cpu_relax() barrier() #endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 709a55989cb0..451ae684aaf4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -67,7 +67,6 @@ struct secondary_data { void *stack; }; extern struct secondary_data secondary_data; -extern volatile int pen_release; extern void secondary_startup(void); extern void secondary_startup_arm(void); diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h index 312784ee9936..c729d2113a24 100644 --- a/arch/arm/include/asm/smp_twd.h +++ b/arch/arm/include/asm/smp_twd.h @@ -19,20 +19,4 @@ #define TWD_TIMER_CONTROL_PERIODIC (1 << 1) #define TWD_TIMER_CONTROL_IT_ENABLE (1 << 2) -#include <linux/ioport.h> - -struct twd_local_timer { - struct resource res[2]; -}; - -#define DEFINE_TWD_LOCAL_TIMER(name,base,irq) \ -struct twd_local_timer name __initdata = { \ - .res = { \ - DEFINE_RES_MEM(base, 0x10), \ - DEFINE_RES_IRQ(irq), \ - }, \ -}; - -int twd_local_timer_register(struct twd_local_timer *); - #endif diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 099c78fcf62d..8f009e788ad4 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -210,11 +210,12 @@ static inline void arch_read_lock(arch_rwlock_t *rw) prefetchw(&rw->lock); __asm__ __volatile__( +" .syntax unified\n" "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" " strexpl %1, %0, [%2]\n" WFE("mi") -" rsbpls %0, %1, #0\n" +" rsbspl %0, %1, #0\n" " bmi 1b" : "=&r" (tmp), "=&r" (tmp2) : "r" (&rw->lock) diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 452bbdcbcc83..506314265c6f 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -10,6 +10,7 @@ struct sleep_save_sp { }; extern void cpu_resume(void); +extern void cpu_resume_no_hyp(void); extern void cpu_resume_arm(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index ae5a0df5316e..dff49845eb87 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -85,7 +85,8 @@ static inline void set_fs(mm_segment_t fs) #define __range_ok(addr, size) ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ - __asm__("adds %1, %2, %3; sbcccs %1, %1, %0; movcc %0, #0" \ + __asm__(".syntax unified\n" \ + "adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \ : "=&r" (flag), "=&r" (roksum) \ : "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \ : "cc"); \ diff --git a/arch/arm/include/asm/v7m.h b/arch/arm/include/asm/v7m.h index 187ccf6496ad..2cb00d15831b 100644 --- a/arch/arm/include/asm/v7m.h +++ b/arch/arm/include/asm/v7m.h @@ -49,7 +49,7 @@ * (0 -> msp; 1 -> psp). Bits [1:0] are fixed to 0b01. */ #define EXC_RET_STACK_MASK 0x00000004 -#define EXC_RET_THREADMODE_PROCESSSTACK 0xfffffffd +#define EXC_RET_THREADMODE_PROCESSSTACK (3 << 2) /* Cache related definitions */ diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index ef5dfedacd8d..628c336e8e3b 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -29,13 +29,13 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - ldcnel p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + ldclne p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - ldceql p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} + ldcleq p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif @@ -53,13 +53,13 @@ ldr \tmp, =elf_hwcap @ may not have MVFR regs ldr \tmp, [\tmp, #0] tst \tmp, #HWCAP_VFPD32 - stcnel p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + stclne p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} addeq \base, \base, #32*4 @ step over unused register space #else VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? - stceql p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} + stcleq p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif #endif diff --git a/arch/arm/include/debug/tegra.S b/arch/arm/include/debug/tegra.S index 3bc80599c022..4a5a645c76e2 100644 --- a/arch/arm/include/debug/tegra.S +++ b/arch/arm/include/debug/tegra.S @@ -173,7 +173,7 @@ .macro senduart, rd, rx cmp \rx, #0 - strneb \rd, [\rx, #UART_TX << UART_SHIFT] + strbne \rd, [\rx, #UART_TX << UART_SHIFT] 1001: .endm diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index b795dc2408c0..b9f94e03d916 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -86,7 +86,7 @@ hexbuf_rel: .long hexbuf_addr - . ENTRY(printascii) addruart_current r3, r1, r2 1: teq r0, #0 - ldrneb r1, [r0], #1 + ldrbne r1, [r0], #1 teqne r1, #0 reteq lr 2: teq r1, #'\n' diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index e85a3af9ddeb..ce4aea57130a 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -636,7 +636,7 @@ call_fpe: @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] rsbs r7, r8, #(1 << 8) @ CP 0 or 1 only - movcss r7, r5, lsr #(TIF_USING_IWMMXT + 1) + movscs r7, r5, lsr #(TIF_USING_IWMMXT + 1) bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) @@ -872,7 +872,7 @@ __kuser_cmpxchg64: @ 0xffff0f60 smp_dmb arm 1: ldrexd r0, r1, [r2] @ load current val eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) + eorseq r3, r1, r5 @ compare with oldval (2) strexdeq r3, r6, r7, [r2] @ store newval if eq teqeq r3, #1 @ success? beq 1b @ if no then retry @@ -896,8 +896,8 @@ __kuser_cmpxchg64: @ 0xffff0f60 ldmia r1, {r6, lr} @ load new val 1: ldmia r2, {r0, r1} @ load current val eors r3, r0, r4 @ compare with oldval (1) - eoreqs r3, r1, r5 @ compare with oldval (2) -2: stmeqia r2, {r6, lr} @ store newval if eq + eorseq r3, r1, r5 @ compare with oldval (2) +2: stmiaeq r2, {r6, lr} @ store newval if eq rsbs r0, r3, #0 @ set return val and C flag ldmfd sp!, {r4, r5, r6, pc} @@ -911,7 +911,7 @@ kuser_cmpxchg64_fixup: mov r7, #0xffff0fff sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64))) subs r8, r4, r7 - rsbcss r8, r8, #(2b - 1b) + rsbscs r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] #if __LINUX_ARM_ARCH__ < 6 bcc kuser_cmpxchg32_fixup @@ -969,7 +969,7 @@ kuser_cmpxchg32_fixup: mov r7, #0xffff0fff sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg))) subs r8, r4, r7 - rsbcss r8, r8, #(2b - 1b) + rsbscs r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] ret lr .previous diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 0465d65d23de..f7649adef505 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -373,7 +373,7 @@ sys_syscall: movhs scno, #0 csdb #endif - stmloia sp, {r5, r6} @ shuffle args + stmialo sp, {r5, r6} @ shuffle args movlo r0, r1 movlo r1, r2 movlo r2, r3 diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 773424843d6e..32051ec5b33f 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -127,7 +127,8 @@ */ .macro v7m_exception_slow_exit ret_r0 cpsid i - ldr lr, =EXC_RET_THREADMODE_PROCESSSTACK + ldr lr, =exc_ret + ldr lr, [lr] @ read original r12, sp, lr, pc and xPSR add r12, sp, #S_IP @@ -387,8 +388,8 @@ badr lr, \ret @ return address .if \reload add r1, sp, #S_R0 + S_OFF @ pointer to regs - ldmccia r1, {r0 - r6} @ reload r0-r6 - stmccia sp, {r4, r5} @ update stack arguments + ldmiacc r1, {r0 - r6} @ reload r0-r6 + stmiacc sp, {r4, r5} @ update stack arguments .endif ldrcc pc, [\table, \tmp, lsl #2] @ call sys_* routine #else @@ -396,8 +397,8 @@ badr lr, \ret @ return address .if \reload add r1, sp, #S_R0 + S_OFF @ pointer to regs - ldmccia r1, {r0 - r6} @ reload r0-r6 - stmccia sp, {r4, r5} @ update stack arguments + ldmiacc r1, {r0 - r6} @ reload r0-r6 + stmiacc sp, {r4, r5} @ update stack arguments .endif ldrcc pc, [\table, \nr, lsl #2] @ call sys_* routine #endif diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S index abcf47848525..19d2dcd6530d 100644 --- a/arch/arm/kernel/entry-v7m.S +++ b/arch/arm/kernel/entry-v7m.S @@ -146,3 +146,7 @@ ENTRY(vector_table) .rept CONFIG_CPU_V7M_NUM_IRQ .long __irq_entry @ External Interrupts .endr + .align 2 + .globl exc_ret +exc_ret: + .space 4 diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index ec29de250076..c08d2d890f7b 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -439,8 +439,8 @@ M_CLASS(str r6, [r12, #PMSAv8_RLAR_A(3)]) str r5, [r12, #PMSAv8_RBAR_A(0)] str r6, [r12, #PMSAv8_RLAR_A(0)] #else - mcr p15, 0, r5, c6, c10, 1 @ PRBAR4 - mcr p15, 0, r6, c6, c10, 2 @ PRLAR4 + mcr p15, 0, r5, c6, c10, 0 @ PRBAR4 + mcr p15, 0, r6, c6, c10, 1 @ PRLAR4 #endif #endif ret lr diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 60146e32619a..82a942894fc0 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -180,8 +180,8 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE @ Check whether GICv3 system registers are available mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 ubfx r7, r7, #28, #4 - cmp r7, #1 - bne 2f + teq r7, #0 + beq 2f @ Enable system register accesses mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index dd2eb5f76b9f..76300f3813e8 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -91,8 +91,11 @@ void machine_crash_nonpanic_core(void *unused) set_cpu_online(smp_processor_id(), false); atomic_dec(&waiting_for_crash_ipi); - while (1) + + while (1) { cpu_relax(); + wfe(); + } } void crash_smp_send_stop(void) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index a50dc00d79a2..d0a05a3bdb96 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -16,7 +16,7 @@ struct patch { unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index a8257fc9cf2a..5dc8b80bb693 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -120,6 +120,14 @@ ENDPROC(cpu_resume_after_mmu) .text .align +#ifdef CONFIG_MCPM + .arm +THUMB( .thumb ) +ENTRY(cpu_resume_no_hyp) +ARM_BE8(setend be) @ ensure we are in BE mode + b no_hyp +#endif + #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) @@ -135,6 +143,7 @@ ARM_BE8(setend be) @ ensure we are in BE mode bl __hyp_stub_install_secondary #endif safe_svcmode_maskall r1 +no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) @@ -164,6 +173,9 @@ ENDPROC(cpu_resume) #ifdef CONFIG_MMU ENDPROC(cpu_resume_arm) #endif +#ifdef CONFIG_MCPM +ENDPROC(cpu_resume_no_hyp) +#endif .align 2 _sleep_save_sp: diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 1d6f5ea522f4..facd4240ca02 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -62,12 +62,6 @@ */ struct secondary_data secondary_data; -/* - * control for which core is the next to come out of the secondary - * boot "holding pen" - */ -volatile int pen_release = -1; - enum ipi_msg_type { IPI_WAKEUP, IPI_TIMER, @@ -604,8 +598,10 @@ static void ipi_cpu_stop(unsigned int cpu) local_fiq_disable(); local_irq_disable(); - while (1) + while (1) { cpu_relax(); + wfe(); + } } static DEFINE_PER_CPU(struct completion *, cpu_completion); diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index b30eafeef096..3cdc399b9fc3 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -100,8 +100,6 @@ static void twd_timer_stop(void) disable_percpu_irq(clk->irq); } -#ifdef CONFIG_COMMON_CLK - /* * Updates clockevent frequency when the cpu frequency changes. * Called on the cpu that is changing frequency with interrupts disabled. @@ -143,54 +141,6 @@ static int twd_clk_init(void) } core_initcall(twd_clk_init); -#elif defined (CONFIG_CPU_FREQ) - -#include <linux/cpufreq.h> - -/* - * Updates clockevent frequency when the cpu frequency changes. - * Called on the cpu that is changing frequency with interrupts disabled. - */ -static void twd_update_frequency(void *data) -{ - twd_timer_rate = clk_get_rate(twd_clk); - - clockevents_update_freq(raw_cpu_ptr(twd_evt), twd_timer_rate); -} - -static int twd_cpufreq_transition(struct notifier_block *nb, - unsigned long state, void *data) -{ - struct cpufreq_freqs *freqs = data; - - /* - * The twd clock events must be reprogrammed to account for the new - * frequency. The timer is local to a cpu, so cross-call to the - * changing cpu. - */ - if (state == CPUFREQ_POSTCHANGE) - smp_call_function_single(freqs->cpu, twd_update_frequency, - NULL, 1); - - return NOTIFY_OK; -} - -static struct notifier_block twd_cpufreq_nb = { - .notifier_call = twd_cpufreq_transition, -}; - -static int twd_cpufreq_init(void) -{ - if (twd_evt && raw_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) - return cpufreq_register_notifier(&twd_cpufreq_nb, - CPUFREQ_TRANSITION_NOTIFIER); - - return 0; -} -core_initcall(twd_cpufreq_init); - -#endif - static void twd_calibrate_rate(void) { unsigned long count; @@ -366,21 +316,6 @@ out_free: return err; } -int __init twd_local_timer_register(struct twd_local_timer *tlt) -{ - if (twd_base || twd_evt) - return -EBUSY; - - twd_ppi = tlt->res[1].start; - - twd_base = ioremap(tlt->res[0].start, resource_size(&tlt->res[0])); - if (!twd_base) - return -ENOMEM; - - return twd_local_timer_common_register(NULL); -} - -#ifdef CONFIG_OF static int __init twd_local_timer_of_register(struct device_node *np) { int err; @@ -406,4 +341,3 @@ out: TIMER_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); TIMER_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); TIMER_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); -#endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 0bee233fef9a..314cfb232a63 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_SPINLOCK(unwind_lock); +static DEFINE_RAW_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) /* module unwind tables */ struct unwind_table *table; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) break; } } - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) if (!tab) return; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 48de846f2246..531e59f5be9c 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -8,9 +8,8 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Iarch/arm/kvm -Ivirt/kvm/arm/vgic -CFLAGS_arm.o := -I. $(plus_virt_def) -CFLAGS_mmu.o := -I. +ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic +CFLAGS_arm.o := $(plus_virt_def) AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index e8bd288fd5be..14915c78bd99 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - u64 now = kvm_phys_timer_read(); - u64 val; + u32 val; if (p->is_write) { val = *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_TVAL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); - *vcpu_reg(vcpu, p->Rt1) = val - now; + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_TVAL); + *vcpu_reg(vcpu, p->Rt1) = val; } return true; @@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu, if (p->is_write) { val = *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CTL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CTL); *vcpu_reg(vcpu, p->Rt1) = val; } @@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, if (p->is_write) { val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; val |= *vcpu_reg(vcpu, p->Rt1); - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val); + kvm_arm_timer_write_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CVAL, val); } else { - val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); + val = kvm_arm_timer_read_sysreg(vcpu, + TIMER_PTIMER, TIMER_REG_CVAL); *vcpu_reg(vcpu, p->Rt1) = val; *vcpu_reg(vcpu, p->Rt2) = val >> 32; } diff --git a/arch/arm/kvm/hyp/cp15-sr.c b/arch/arm/kvm/hyp/cp15-sr.c index c4782812714c..8bf895ec6e04 100644 --- a/arch/arm/kvm/hyp/cp15-sr.c +++ b/arch/arm/kvm/hyp/cp15-sr.c @@ -27,7 +27,6 @@ static u64 *cp15_64(struct kvm_cpu_context *ctxt, int idx) void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) { - ctxt->cp15[c0_MPIDR] = read_sysreg(VMPIDR); ctxt->cp15[c0_CSSELR] = read_sysreg(CSSELR); ctxt->cp15[c1_SCTLR] = read_sysreg(SCTLR); ctxt->cp15[c1_CPACR] = read_sysreg(CPACR); diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index aa3f9a9837ac..6ed3cf23fe89 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -176,7 +176,7 @@ THUMB( orr lr, lr, #PSR_T_BIT ) msr spsr_cxsf, lr ldr lr, =panic msr ELR_hyp, lr - ldr lr, =kvm_call_hyp + ldr lr, =__kvm_call_hyp clrex eret ENDPROC(__hyp_do_panic) diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index acf1c37fa49c..3b058a5d7c5f 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -77,7 +77,7 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) { struct kvm *kvm = kern_hyp_va(vcpu->kvm); - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); write_sysreg(vcpu->arch.midr, VPIDR); } diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index c0edd450e104..8e4afba73635 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -41,7 +41,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); isb(); write_sysreg(0, TLBIALLIS); @@ -61,7 +61,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, VTTBR); + write_sysreg(kvm_get_vttbr(kvm), VTTBR); isb(); write_sysreg(0, TLBIALL); diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 80a1d6cd261c..a08e6419ebe9 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -42,7 +42,7 @@ * r12: caller save * rest: callee save */ -ENTRY(kvm_call_hyp) +ENTRY(__kvm_call_hyp) hvc #0 bx lr -ENDPROC(kvm_call_hyp) +ENDPROC(__kvm_call_hyp) diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index ad25fd1872c7..0bff0176db2c 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -39,7 +39,7 @@ $(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S ifeq ($(CONFIG_KERNEL_MODE_NEON),y) - NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon + NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon CFLAGS_xor-neon.o += $(NEON_FLAGS) obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o endif diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 93cddab73072..95bd35991288 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -7,7 +7,7 @@ ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned + strbne r1, [ip] @ assert word-aligned mov r2, #1 and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 @@ -32,7 +32,7 @@ ENDPROC(\name ) ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned + strbne r1, [ip] @ assert word-aligned mov r2, #1 and r3, r0, #31 @ Get bit offset mov r0, r0, lsr #5 @@ -62,7 +62,7 @@ ENDPROC(\name ) ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned + strbne r1, [ip] @ assert word-aligned and r2, r0, #31 mov r0, r0, lsr #5 mov r3, #1 @@ -89,7 +89,7 @@ ENDPROC(\name ) ENTRY( \name ) UNWIND( .fnstart ) ands ip, r1, #3 - strneb r1, [ip] @ assert word-aligned + strbne r1, [ip] @ assert word-aligned and r3, r0, #31 mov r0, r0, lsr #5 save_and_disable_irqs ip diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index e936352ccb00..55946e3fa2ba 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -44,7 +44,7 @@ UNWIND(.save {r1, lr}) strusr r2, r0, 1, ne, rept=2 tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 it ne @ explicit IT needed for the label -USER( strnebt r2, [r0]) +USER( strbtne r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} UNWIND(.fnend) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 0d4c189c7f4f..6a3419e2c6d8 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -91,7 +91,7 @@ .endm .macro str1b ptr reg cond=al abort - str\cond\()b \reg, [\ptr], #1 + strb\cond \reg, [\ptr], #1 .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ee2f6706f86..b84ce1792043 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -39,9 +39,9 @@ ENTRY(copy_page) .endr subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 - ldmgtia r1!, {r3, r4, ip, lr} @ 4 + ldmiagt r1!, {r3, r4, ip, lr} @ 4 bgt 1b @ 1 - PLD( ldmeqia r1!, {r3, r4, ip, lr} ) + PLD( ldmiaeq r1!, {r3, r4, ip, lr} ) PLD( beq 2b ) ldmfd sp!, {r4, pc} @ 3 ENDPROC(copy_page) diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 652e4d98cd47..a11f2c25e03a 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -99,7 +99,7 @@ CALGN( ands ip, r0, #31 ) CALGN( rsb r3, ip, #32 ) - CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( sbcsne r4, r3, r2 ) @ C is always set here CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, r3 ) @ C gets set @@ -204,7 +204,7 @@ CALGN( ands ip, r0, #31 ) CALGN( rsb ip, ip, #32 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( sbcsne r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) CALGN( bcc 15f ) @@ -241,7 +241,7 @@ orr r9, r9, ip, lspush #\push mov ip, ip, lspull #\pull orr ip, ip, lr, lspush #\push - str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f + str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f bge 12b PLD( cmn r2, #96 ) PLD( bge 13b ) diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 97a6ff4b7e3c..c7d08096e354 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -49,7 +49,7 @@ .endm .macro ldr1b ptr reg cond=al abort - ldr\cond\()b \reg, [\ptr], #1 + ldrb\cond \reg, [\ptr], #1 .endm #ifdef CONFIG_CPU_USE_DOMAINS diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 984e0f29d548..bd84e2db353b 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -40,9 +40,9 @@ td3 .req lr /* we must have at least one byte. */ tst buf, #1 @ odd address? movne sum, sum, ror #8 - ldrneb td0, [buf], #1 + ldrbne td0, [buf], #1 subne len, len, #1 - adcnes sum, sum, td0, put_byte_1 + adcsne sum, sum, td0, put_byte_1 .Lless4: tst len, #6 beq .Lless8_byte @@ -68,8 +68,8 @@ td3 .req lr bne .Lless8_wordlp .Lless8_byte: tst len, #1 @ odd number of bytes - ldrneb td0, [buf], #1 @ include last byte - adcnes sum, sum, td0, put_byte_0 @ update checksum + ldrbne td0, [buf], #1 @ include last byte + adcsne sum, sum, td0, put_byte_0 @ update checksum .Ldone: adc r0, sum, #0 @ collect up the last carry ldr td0, [sp], #4 @@ -78,17 +78,17 @@ td3 .req lr ldr pc, [sp], #4 @ return .Lnot_aligned: tst buf, #1 @ odd address - ldrneb td0, [buf], #1 @ make even + ldrbne td0, [buf], #1 @ make even subne len, len, #1 - adcnes sum, sum, td0, put_byte_1 @ update checksum + adcsne sum, sum, td0, put_byte_1 @ update checksum tst buf, #2 @ 32-bit aligned? #if __LINUX_ARM_ARCH__ >= 4 - ldrneh td0, [buf], #2 @ make 32-bit aligned + ldrhne td0, [buf], #2 @ make 32-bit aligned subne len, len, #2 #else - ldrneb td0, [buf], #1 - ldrneb ip, [buf], #1 + ldrbne td0, [buf], #1 + ldrbne ip, [buf], #1 subne len, len, #2 #ifndef __ARMEB__ orrne td0, td0, ip, lsl #8 @@ -96,7 +96,7 @@ td3 .req lr orrne td0, ip, td0, lsl #8 #endif #endif - adcnes sum, sum, td0 @ update checksum + adcsne sum, sum, td0 @ update checksum ret lr ENTRY(csum_partial) diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index 10b45909610c..08e17758cbea 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -148,9 +148,9 @@ FN_ENTRY strb r5, [dst], #1 mov r5, r4, get_byte_2 .Lexit: tst len, #1 - strneb r5, [dst], #1 + strbne r5, [dst], #1 andne r5, r5, #255 - adcnes sum, sum, r5, put_byte_0 + adcsne sum, sum, r5, put_byte_0 /* * If the dst pointer was not 16-bit aligned, we diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index b83fdc06286a..f4716d98e0b4 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -95,7 +95,7 @@ add r2, r2, r1 mov r0, #0 @ zero the buffer 9002: teq r2, r1 - strneb r0, [r1], #1 + strbne r0, [r1], #1 bne 9002b load_regs .popsection diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index a9eafe4981eb..4d80f690c48b 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -88,8 +88,8 @@ UNWIND(.fnstart) @ Break out early if dividend reaches 0. 2: cmp xh, yl orrcs yh, yh, ip - subcss xh, xh, yl - movnes ip, ip, lsr #1 + subscs xh, xh, yl + movsne ip, ip, lsr #1 mov yl, yl, lsr #1 bne 2b diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S index 617150b1baef..de68d3b343e3 100644 --- a/arch/arm/lib/floppydma.S +++ b/arch/arm/lib/floppydma.S @@ -14,8 +14,8 @@ .global floppy_fiqin_end ENTRY(floppy_fiqin_start) subs r9, r9, #1 - ldrgtb r12, [r11, #-4] - ldrleb r12, [r11], #0 + ldrbgt r12, [r11, #-4] + ldrble r12, [r11], #0 strb r12, [r10], #1 subs pc, lr, #4 floppy_fiqin_end: @@ -23,10 +23,10 @@ floppy_fiqin_end: .global floppy_fiqout_end ENTRY(floppy_fiqout_start) subs r9, r9, #1 - ldrgeb r12, [r10], #1 + ldrbge r12, [r10], #1 movlt r12, #0 - strleb r12, [r11], #0 - subles pc, lr, #4 + strble r12, [r11], #0 + subsle pc, lr, #4 strb r12, [r11, #-4] subs pc, lr, #4 floppy_fiqout_end: diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index c31b2f3153f1..91038a0a77b5 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -16,10 +16,10 @@ cmp ip, #2 ldrb r3, [r0] strb r3, [r1], #1 - ldrgeb r3, [r0] - strgeb r3, [r1], #1 - ldrgtb r3, [r0] - strgtb r3, [r1], #1 + ldrbge r3, [r0] + strbge r3, [r1], #1 + ldrbgt r3, [r0] + strbgt r3, [r1], #1 subs r2, r2, ip bne .Linsb_aligned @@ -72,7 +72,7 @@ ENTRY(__raw_readsb) bpl .Linsb_16_lp tst r2, #15 - ldmeqfd sp!, {r4 - r6, pc} + ldmfdeq sp!, {r4 - r6, pc} .Linsb_no_16: tst r2, #8 beq .Linsb_no_8 @@ -109,15 +109,15 @@ ENTRY(__raw_readsb) str r3, [r1], #4 .Linsb_no_4: ands r2, r2, #3 - ldmeqfd sp!, {r4 - r6, pc} + ldmfdeq sp!, {r4 - r6, pc} cmp r2, #2 ldrb r3, [r0] strb r3, [r1], #1 - ldrgeb r3, [r0] - strgeb r3, [r1], #1 - ldrgtb r3, [r0] - strgtb r3, [r1] + ldrbge r3, [r0] + strbge r3, [r1], #1 + ldrbgt r3, [r0] + strbgt r3, [r1] ldmfd sp!, {r4 - r6, pc} ENDPROC(__raw_readsb) diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 2ed86fa5465f..f2e2064318d2 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -30,7 +30,7 @@ ENTRY(__raw_readsl) 2: movs r2, r2, lsl #31 ldrcs r3, [r0, #0] ldrcs ip, [r0, #0] - stmcsia r1!, {r3, ip} + stmiacs r1!, {r3, ip} ldrne r3, [r0, #0] strne r3, [r1, #0] ret lr diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 413da9914529..8b25b69c516e 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -68,7 +68,7 @@ ENTRY(__raw_readsw) bpl .Linsw_8_lp tst r2, #7 - ldmeqfd sp!, {r4, r5, r6, pc} + ldmfdeq sp!, {r4, r5, r6, pc} .Lno_insw_8: tst r2, #4 beq .Lno_insw_4 @@ -97,9 +97,9 @@ ENTRY(__raw_readsw) .Lno_insw_2: tst r2, #1 ldrne r3, [r0] - strneb r3, [r1], #1 + strbne r3, [r1], #1 movne r3, r3, lsr #8 - strneb r3, [r1] + strbne r3, [r1] ldmfd sp!, {r4, r5, r6, pc} diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index d9a45e9692ae..5efdd66f5dcd 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -76,8 +76,8 @@ ENTRY(__raw_readsw) pack r3, r3, ip str r3, [r1], #4 -.Lno_insw_2: ldrneh r3, [r0] - strneh r3, [r1] +.Lno_insw_2: ldrhne r3, [r0] + strhne r3, [r1] ldmfd sp!, {r4, r5, pc} @@ -94,7 +94,7 @@ ENTRY(__raw_readsw) #endif .Linsw_noalign: stmfd sp!, {r4, lr} - ldrccb ip, [r1, #-1]! + ldrbcc ip, [r1, #-1]! bcc 1f ldrh ip, [r0] @@ -121,11 +121,11 @@ ENTRY(__raw_readsw) 3: tst r2, #1 strb ip, [r1], #1 - ldrneh ip, [r0] + ldrhne ip, [r0] _BE_ONLY_( movne ip, ip, ror #8 ) - strneb ip, [r1], #1 + strbne ip, [r1], #1 _LE_ONLY_( movne ip, ip, lsr #8 ) _BE_ONLY_( movne ip, ip, lsr #24 ) - strneb ip, [r1] + strbne ip, [r1] ldmfd sp!, {r4, pc} ENDPROC(__raw_readsw) diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index a46bbc9b168b..7d2881a2381e 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -36,10 +36,10 @@ cmp ip, #2 ldrb r3, [r1], #1 strb r3, [r0] - ldrgeb r3, [r1], #1 - strgeb r3, [r0] - ldrgtb r3, [r1], #1 - strgtb r3, [r0] + ldrbge r3, [r1], #1 + strbge r3, [r0] + ldrbgt r3, [r1], #1 + strbgt r3, [r0] subs r2, r2, ip bne .Loutsb_aligned @@ -64,7 +64,7 @@ ENTRY(__raw_writesb) bpl .Loutsb_16_lp tst r2, #15 - ldmeqfd sp!, {r4, r5, pc} + ldmfdeq sp!, {r4, r5, pc} .Loutsb_no_16: tst r2, #8 beq .Loutsb_no_8 @@ -80,15 +80,15 @@ ENTRY(__raw_writesb) outword r3 .Loutsb_no_4: ands r2, r2, #3 - ldmeqfd sp!, {r4, r5, pc} + ldmfdeq sp!, {r4, r5, pc} cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0] - ldrgeb r3, [r1], #1 - strgeb r3, [r0] - ldrgtb r3, [r1] - strgtb r3, [r0] + ldrbge r3, [r1], #1 + strbge r3, [r0] + ldrbgt r3, [r1] + strbgt r3, [r0] ldmfd sp!, {r4, r5, pc} ENDPROC(__raw_writesb) diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 4ea2435988c1..7596ac0c90b0 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -28,7 +28,7 @@ ENTRY(__raw_writesl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 - ldmcsia r1!, {r3, ip} + ldmiacs r1!, {r3, ip} strcs r3, [r0, #0] ldrne r3, [r1, #0] strcs ip, [r0, #0] diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index 121789eb6802..cb94b9b49405 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -79,7 +79,7 @@ ENTRY(__raw_writesw) bpl .Loutsw_8_lp tst r2, #7 - ldmeqfd sp!, {r4, r5, r6, pc} + ldmfdeq sp!, {r4, r5, r6, pc} .Lno_outsw_8: tst r2, #4 beq .Lno_outsw_4 diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index 269f90c51ad2..e6645b2f249e 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -61,8 +61,8 @@ ENTRY(__raw_writesw) ldr r3, [r1], #4 outword r3 -.Lno_outsw_2: ldrneh r3, [r1] - strneh r3, [r0] +.Lno_outsw_2: ldrhne r3, [r1] + strhne r3, [r0] ldmfd sp!, {r4, r5, pc} @@ -95,6 +95,6 @@ ENTRY(__raw_writesw) tst r2, #1 3: movne ip, r3, lsr #8 - strneh ip, [r0] + strhne ip, [r0] ret lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 9397b2e532af..c23f9d9e2970 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -96,7 +96,7 @@ Boston, MA 02111-1307, USA. */ subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? movne \divisor, \divisor, lsr #4 bne 1b @@ -182,7 +182,7 @@ Boston, MA 02111-1307, USA. */ subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 + subsge \order, \order, #4 bge 1b tst \order, #3 diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 64111bd4440b..4a6997bb4404 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -30,7 +30,7 @@ .endm .macro ldr1b ptr reg cond=al abort - ldr\cond\()b \reg, [\ptr], #1 + ldrb\cond \reg, [\ptr], #1 .endm .macro str1w ptr reg abort @@ -42,7 +42,7 @@ .endm .macro str1b ptr reg cond=al abort - str\cond\()b \reg, [\ptr], #1 + strb\cond \reg, [\ptr], #1 .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 69a9d47fc5ab..d70304cb2cd0 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -59,7 +59,7 @@ ENTRY(memmove) blt 5f CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( sbcsne r4, ip, r2 ) @ C is always set here CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, ip ) @ C is set here @@ -114,20 +114,20 @@ ENTRY(memmove) UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block 8: movs r2, r2, lsl #31 - ldrneb r3, [r1, #-1]! - ldrcsb r4, [r1, #-1]! - ldrcsb ip, [r1, #-1] - strneb r3, [r0, #-1]! - strcsb r4, [r0, #-1]! - strcsb ip, [r0, #-1] + ldrbne r3, [r1, #-1]! + ldrbcs r4, [r1, #-1]! + ldrbcs ip, [r1, #-1] + strbne r3, [r0, #-1]! + strbcs r4, [r0, #-1]! + strbcs ip, [r0, #-1] ldmfd sp!, {r0, r4, pc} 9: cmp ip, #2 - ldrgtb r3, [r1, #-1]! - ldrgeb r4, [r1, #-1]! + ldrbgt r3, [r1, #-1]! + ldrbge r4, [r1, #-1]! ldrb lr, [r1, #-1]! - strgtb r3, [r0, #-1]! - strgeb r4, [r0, #-1]! + strbgt r3, [r0, #-1]! + strbge r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! blt 8b @@ -150,7 +150,7 @@ ENTRY(memmove) blt 14f CALGN( ands ip, r0, #31 ) - CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( sbcsne r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) CALGN( bcc 15f ) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index ed6d35d9cdb5..5593a45e0a8c 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -44,20 +44,20 @@ UNWIND( .save {r8, lr} ) mov lr, r3 2: subs r2, r2, #64 - stmgeia ip!, {r1, r3, r8, lr} @ 64 bytes at a time. - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} - stmgeia ip!, {r1, r3, r8, lr} + stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. + stmiage ip!, {r1, r3, r8, lr} + stmiage ip!, {r1, r3, r8, lr} + stmiage ip!, {r1, r3, r8, lr} bgt 2b - ldmeqfd sp!, {r8, pc} @ Now <64 bytes to go. + ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 - stmneia ip!, {r1, r3, r8, lr} - stmneia ip!, {r1, r3, r8, lr} + stmiane ip!, {r1, r3, r8, lr} + stmiane ip!, {r1, r3, r8, lr} tst r2, #16 - stmneia ip!, {r1, r3, r8, lr} + stmiane ip!, {r1, r3, r8, lr} ldmfd sp!, {r8, lr} UNWIND( .fnend ) @@ -87,22 +87,22 @@ UNWIND( .save {r4-r8, lr} ) rsb r8, r8, #32 sub r2, r2, r8 movs r8, r8, lsl #(32 - 4) - stmcsia ip!, {r4, r5, r6, r7} - stmmiia ip!, {r4, r5} + stmiacs ip!, {r4, r5, r6, r7} + stmiami ip!, {r4, r5} tst r8, #(1 << 30) mov r8, r1 strne r1, [ip], #4 3: subs r2, r2, #64 - stmgeia ip!, {r1, r3-r8, lr} - stmgeia ip!, {r1, r3-r8, lr} + stmiage ip!, {r1, r3-r8, lr} + stmiage ip!, {r1, r3-r8, lr} bgt 3b - ldmeqfd sp!, {r4-r8, pc} + ldmfdeq sp!, {r4-r8, pc} tst r2, #32 - stmneia ip!, {r1, r3-r8, lr} + stmiane ip!, {r1, r3-r8, lr} tst r2, #16 - stmneia ip!, {r4-r7} + stmiane ip!, {r4-r7} ldmfd sp!, {r4-r8, lr} UNWIND( .fnend ) @@ -110,7 +110,7 @@ UNWIND( .fnend ) UNWIND( .fnstart ) 4: tst r2, #8 - stmneia ip!, {r1, r3} + stmiane ip!, {r1, r3} tst r2, #4 strne r1, [ip], #4 /* @@ -118,17 +118,17 @@ UNWIND( .fnstart ) * may have an unaligned pointer as well. */ 5: tst r2, #2 - strneb r1, [ip], #1 - strneb r1, [ip], #1 + strbne r1, [ip], #1 + strbne r1, [ip], #1 tst r2, #1 - strneb r1, [ip], #1 + strbne r1, [ip], #1 ret lr 6: subs r2, r2, #4 @ 1 do we have enough blt 5b @ 1 bytes to align with? cmp r3, #2 @ 1 - strltb r1, [ip], #1 @ 1 - strleb r1, [ip], #1 @ 1 + strblt r1, [ip], #1 @ 1 + strble r1, [ip], #1 @ 1 strb r1, [ip], #1 @ 1 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) b 1b diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index 2c40aeab3eaa..c691b901092f 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -14,7 +14,7 @@ MODULE_LICENSE("GPL"); #ifndef __ARM_NEON__ -#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon' +#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon' #endif /* diff --git a/arch/arm/mach-actions/platsmp.c b/arch/arm/mach-actions/platsmp.c index 3efaa10efc43..4fd479c948e6 100644 --- a/arch/arm/mach-actions/platsmp.c +++ b/arch/arm/mach-actions/platsmp.c @@ -39,10 +39,6 @@ static void __iomem *sps_base_addr; static void __iomem *timer_base_addr; static int ncores; -static DEFINE_SPINLOCK(boot_lock); - -void owl_secondary_startup(void); - static int s500_wakeup_secondary(unsigned int cpu) { int ret; @@ -84,7 +80,6 @@ static int s500_wakeup_secondary(unsigned int cpu) static int s500_smp_boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; int ret; ret = s500_wakeup_secondary(cpu); @@ -93,21 +88,11 @@ static int s500_smp_boot_secondary(unsigned int cpu, struct task_struct *idle) udelay(10); - spin_lock(&boot_lock); - smp_send_reschedule(cpu); - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (pen_release == -1) - break; - } - writel(0, timer_base_addr + OWL_CPU1_ADDR + (cpu - 1) * 4); writel(0, timer_base_addr + OWL_CPU1_FLAG + (cpu - 1) * 4); - spin_unlock(&boot_lock); - return 0; } diff --git a/arch/arm/mach-exynos/headsmp.S b/arch/arm/mach-exynos/headsmp.S index 005695c9bf40..0ac2cb9a7355 100644 --- a/arch/arm/mach-exynos/headsmp.S +++ b/arch/arm/mach-exynos/headsmp.S @@ -36,4 +36,4 @@ ENDPROC(exynos4_secondary_startup) .align 2 1: .long . - .long pen_release + .long exynos_pen_release diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index b6da7edbbd2f..abcac6164233 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -28,6 +28,9 @@ extern void exynos4_secondary_startup(void); +/* XXX exynos_pen_release is cargo culted code - DO NOT COPY XXX */ +volatile int exynos_pen_release = -1; + #ifdef CONFIG_HOTPLUG_CPU static inline void cpu_leave_lowpower(u32 core_id) { @@ -57,7 +60,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) wfi(); - if (pen_release == core_id) { + if (exynos_pen_release == core_id) { /* * OK, proper wakeup, we're done */ @@ -228,15 +231,17 @@ void exynos_core_restart(u32 core_id) } /* - * Write pen_release in a way that is guaranteed to be visible to all - * observers, irrespective of whether they're taking part in coherency + * XXX CARGO CULTED CODE - DO NOT COPY XXX + * + * Write exynos_pen_release in a way that is guaranteed to be visible to + * all observers, irrespective of whether they're taking part in coherency * or not. This is necessary for the hotplug code to work reliably. */ -static void write_pen_release(int val) +static void exynos_write_pen_release(int val) { - pen_release = val; + exynos_pen_release = val; smp_wmb(); - sync_cache_w(&pen_release); + sync_cache_w(&exynos_pen_release); } static DEFINE_SPINLOCK(boot_lock); @@ -247,7 +252,7 @@ static void exynos_secondary_init(unsigned int cpu) * let the primary processor know we're out of the * pen, then head off into the C entry point */ - write_pen_release(-1); + exynos_write_pen_release(-1); /* * Synchronise with the boot thread. @@ -322,12 +327,12 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) /* * The secondary processor is waiting to be released from * the holding pen - release it, then wait for it to flag - * that it has been released by resetting pen_release. + * that it has been released by resetting exynos_pen_release. * - * Note that "pen_release" is the hardware CPU core ID, whereas + * Note that "exynos_pen_release" is the hardware CPU core ID, whereas * "cpu" is Linux's internal ID. */ - write_pen_release(core_id); + exynos_write_pen_release(core_id); if (!exynos_cpu_power_state(core_id)) { exynos_cpu_power_up(core_id); @@ -376,13 +381,13 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) else arch_send_wakeup_ipi_mask(cpumask_of(cpu)); - if (pen_release == -1) + if (exynos_pen_release == -1) break; udelay(10); } - if (pen_release != -1) + if (exynos_pen_release != -1) ret = -ETIMEDOUT; /* @@ -392,7 +397,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) fail: spin_unlock(&boot_lock); - return pen_release != -1 ? ret : 0; + return exynos_pen_release != -1 ? ret : 0; } static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/arm/mach-ks8695/include/mach/entry-macro.S b/arch/arm/mach-ks8695/include/mach/entry-macro.S index 8315b34f32ff..7ff812cb010b 100644 --- a/arch/arm/mach-ks8695/include/mach/entry-macro.S +++ b/arch/arm/mach-ks8695/include/mach/entry-macro.S @@ -42,6 +42,6 @@ moveq \irqstat, \irqstat, lsr #2 addeq \irqnr, \irqnr, #2 tst \irqstat, #0x01 - addeqs \irqnr, \irqnr, #1 + addseq \irqnr, \irqnr, #1 1001: .endm diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 058a37e6d11c..fd6e0671f957 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -523,8 +523,10 @@ void omap_prm_reset_system(void) prm_ll_data->reset_system(); - while (1) + while (1) { cpu_relax(); + wfe(); + } } /** diff --git a/arch/arm/mach-oxnas/Makefile b/arch/arm/mach-oxnas/Makefile index b625906a9970..61a34e1c0f22 100644 --- a/arch/arm/mach-oxnas/Makefile +++ b/arch/arm/mach-oxnas/Makefile @@ -1,2 +1 @@ obj-$(CONFIG_SMP) += platsmp.o headsmp.o -obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o diff --git a/arch/arm/mach-oxnas/hotplug.c b/arch/arm/mach-oxnas/hotplug.c deleted file mode 100644 index 854f29b8cba6..000000000000 --- a/arch/arm/mach-oxnas/hotplug.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (C) 2002 ARM Ltd. - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/smp.h> - -#include <asm/cp15.h> -#include <asm/smp_plat.h> - -static inline void cpu_enter_lowpower(void) -{ - unsigned int v; - - asm volatile( - " mcr p15, 0, %1, c7, c5, 0\n" - " mcr p15, 0, %1, c7, c10, 4\n" - /* - * Turn off coherency - */ - " mrc p15, 0, %0, c1, c0, 1\n" - " bic %0, %0, #0x20\n" - " mcr p15, 0, %0, c1, c0, 1\n" - " mrc p15, 0, %0, c1, c0, 0\n" - " bic %0, %0, %2\n" - " mcr p15, 0, %0, c1, c0, 0\n" - : "=&r" (v) - : "r" (0), "Ir" (CR_C) - : "cc"); -} - -static inline void cpu_leave_lowpower(void) -{ - unsigned int v; - - asm volatile( "mrc p15, 0, %0, c1, c0, 0\n" - " orr %0, %0, %1\n" - " mcr p15, 0, %0, c1, c0, 0\n" - " mrc p15, 0, %0, c1, c0, 1\n" - " orr %0, %0, #0x20\n" - " mcr p15, 0, %0, c1, c0, 1\n" - : "=&r" (v) - : "Ir" (CR_C) - : "cc"); -} - -static inline void platform_do_lowpower(unsigned int cpu, int *spurious) -{ - /* - * there is no power-control hardware on this platform, so all - * we can do is put the core into WFI; this is safe as the calling - * code will have already disabled interrupts - */ - for (;;) { - /* - * here's the WFI - */ - asm(".word 0xe320f003\n" - : - : - : "memory", "cc"); - - if (pen_release == cpu_logical_map(cpu)) { - /* - * OK, proper wakeup, we're done - */ - break; - } - - /* - * Getting here, means that we have come out of WFI without - * having been woken up - this shouldn't happen - * - * Just note it happening - when we're woken, we can report - * its occurrence. - */ - (*spurious)++; - } -} - -/* - * platform-specific code to shutdown a CPU - * - * Called with IRQs disabled - */ -void ox820_cpu_die(unsigned int cpu) -{ - int spurious = 0; - - /* - * we're ready for shutdown now, so do it - */ - cpu_enter_lowpower(); - platform_do_lowpower(cpu, &spurious); - - /* - * bring this CPU back into the world of cache - * coherency, and then restore interrupts - */ - cpu_leave_lowpower(); - - if (spurious) - pr_warn("CPU%u: %u spurious wakeup calls\n", cpu, spurious); -} diff --git a/arch/arm/mach-oxnas/platsmp.c b/arch/arm/mach-oxnas/platsmp.c index 442cc8a2f7dc..735141c0e3a3 100644 --- a/arch/arm/mach-oxnas/platsmp.c +++ b/arch/arm/mach-oxnas/platsmp.c @@ -19,7 +19,6 @@ #include <asm/smp_scu.h> extern void ox820_secondary_startup(void); -extern void ox820_cpu_die(unsigned int cpu); static void __iomem *cpu_ctrl; static void __iomem *gic_cpu_ctrl; @@ -94,9 +93,6 @@ unmap_scu: static const struct smp_operations ox820_smp_ops __initconst = { .smp_prepare_cpus = ox820_smp_prepare_cpus, .smp_boot_secondary = ox820_boot_secondary, -#ifdef CONFIG_HOTPLUG_CPU - .cpu_die = ox820_cpu_die, -#endif }; CPU_METHOD_OF_DECLARE(ox820_smp, "oxsemi,ox820-smp", &ox820_smp_ops); diff --git a/arch/arm/mach-prima2/common.h b/arch/arm/mach-prima2/common.h index 6d77b622d168..457eb7b18160 100644 --- a/arch/arm/mach-prima2/common.h +++ b/arch/arm/mach-prima2/common.h @@ -15,6 +15,8 @@ #include <asm/mach/time.h> #include <asm/exception.h> +extern volatile int prima2_pen_release; + extern const struct smp_operations sirfsoc_smp_ops; extern void sirfsoc_secondary_startup(void); extern void sirfsoc_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-prima2/headsmp.S b/arch/arm/mach-prima2/headsmp.S index 209d9fc5c16c..6cf4fc60347b 100644 --- a/arch/arm/mach-prima2/headsmp.S +++ b/arch/arm/mach-prima2/headsmp.S @@ -34,4 +34,4 @@ ENDPROC(sirfsoc_secondary_startup) .align 1: .long . - .long pen_release + .long prima2_pen_release diff --git a/arch/arm/mach-prima2/hotplug.c b/arch/arm/mach-prima2/hotplug.c index a728c78b996f..b6cf1527e330 100644 --- a/arch/arm/mach-prima2/hotplug.c +++ b/arch/arm/mach-prima2/hotplug.c @@ -11,6 +11,7 @@ #include <linux/smp.h> #include <asm/smp_plat.h> +#include "common.h" static inline void platform_do_lowpower(unsigned int cpu) { @@ -18,7 +19,7 @@ static inline void platform_do_lowpower(unsigned int cpu) for (;;) { __asm__ __volatile__("dsb\n\t" "wfi\n\t" : : : "memory"); - if (pen_release == cpu_logical_map(cpu)) { + if (prima2_pen_release == cpu_logical_map(cpu)) { /* * OK, proper wakeup, we're done */ diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index 75ef5d4be554..d1f8b5168083 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -24,13 +24,16 @@ static void __iomem *clk_base; static DEFINE_SPINLOCK(boot_lock); +/* XXX prima2_pen_release is cargo culted code - DO NOT COPY XXX */ +volatile int prima2_pen_release = -1; + static void sirfsoc_secondary_init(unsigned int cpu) { /* * let the primary processor know we're out of the * pen, then head off into the C entry point */ - pen_release = -1; + prima2_pen_release = -1; smp_wmb(); /* @@ -80,13 +83,13 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) /* * The secondary processor is waiting to be released from * the holding pen - release it, then wait for it to flag - * that it has been released by resetting pen_release. + * that it has been released by resetting prima2_pen_release. * - * Note that "pen_release" is the hardware CPU ID, whereas + * Note that "prima2_pen_release" is the hardware CPU ID, whereas * "cpu" is Linux's internal ID. */ - pen_release = cpu_logical_map(cpu); - sync_cache_w(&pen_release); + prima2_pen_release = cpu_logical_map(cpu); + sync_cache_w(&prima2_pen_release); /* * Send the secondary CPU SEV, thereby causing the boot monitor to read @@ -97,7 +100,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { smp_rmb(); - if (pen_release == -1) + if (prima2_pen_release == -1) break; udelay(10); @@ -109,7 +112,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) */ spin_unlock(&boot_lock); - return pen_release != -1 ? -ENOSYS : 0; + return prima2_pen_release != -1 ? -ENOSYS : 0; } const struct smp_operations sirfsoc_smp_ops __initconst = { diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index 5494c9e0c909..99a6a5e809e0 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -46,8 +46,6 @@ extern void secondary_startup_arm(void); -static DEFINE_SPINLOCK(boot_lock); - #ifdef CONFIG_HOTPLUG_CPU static void qcom_cpu_die(unsigned int cpu) { @@ -55,15 +53,6 @@ static void qcom_cpu_die(unsigned int cpu) } #endif -static void qcom_secondary_init(unsigned int cpu) -{ - /* - * Synchronise with the boot thread. - */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); -} - static int scss_release_secondary(unsigned int cpu) { struct device_node *node; @@ -281,24 +270,12 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) } /* - * set synchronisation state between this boot processor - * and the secondary one - */ - spin_lock(&boot_lock); - - /* * Send the secondary CPU a soft interrupt, thereby causing * the boot monitor to read the system wide flags register, * and branch to the address found there. */ arch_send_wakeup_ipi_mask(cpumask_of(cpu)); - /* - * now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - spin_unlock(&boot_lock); - return ret; } @@ -334,7 +311,6 @@ static void __init qcom_smp_prepare_cpus(unsigned int max_cpus) static const struct smp_operations smp_msm8660_ops __initconst = { .smp_prepare_cpus = qcom_smp_prepare_cpus, - .smp_secondary_init = qcom_secondary_init, .smp_boot_secondary = msm8660_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = qcom_cpu_die, @@ -344,7 +320,6 @@ CPU_METHOD_OF_DECLARE(qcom_smp, "qcom,gcc-msm8660", &smp_msm8660_ops); static const struct smp_operations qcom_smp_kpssv1_ops __initconst = { .smp_prepare_cpus = qcom_smp_prepare_cpus, - .smp_secondary_init = qcom_secondary_init, .smp_boot_secondary = kpssv1_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = qcom_cpu_die, @@ -354,7 +329,6 @@ CPU_METHOD_OF_DECLARE(qcom_smp_kpssv1, "qcom,kpss-acc-v1", &qcom_smp_kpssv1_ops) static const struct smp_operations qcom_smp_kpssv2_ops __initconst = { .smp_prepare_cpus = qcom_smp_prepare_cpus, - .smp_secondary_init = qcom_secondary_init, .smp_boot_secondary = kpssv2_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU .cpu_die = qcom_cpu_die, diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index 909b97c0b237..25b4c5e66e39 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -20,6 +20,8 @@ #include <asm/mach/time.h> +extern volatile int spear_pen_release; + extern void spear13xx_timer_init(void); extern void spear3xx_timer_init(void); extern struct pl022_ssp_controller pl022_plat_data; diff --git a/arch/arm/mach-spear/headsmp.S b/arch/arm/mach-spear/headsmp.S index c52192dc3d9f..6e250b6c0aa2 100644 --- a/arch/arm/mach-spear/headsmp.S +++ b/arch/arm/mach-spear/headsmp.S @@ -43,5 +43,5 @@ pen: ldr r7, [r6] .align 1: .long . - .long pen_release + .long spear_pen_release ENDPROC(spear13xx_secondary_startup) diff --git a/arch/arm/mach-spear/hotplug.c b/arch/arm/mach-spear/hotplug.c index 12edd1cf8a12..0dd84f609627 100644 --- a/arch/arm/mach-spear/hotplug.c +++ b/arch/arm/mach-spear/hotplug.c @@ -16,6 +16,8 @@ #include <asm/cp15.h> #include <asm/smp_plat.h> +#include "generic.h" + static inline void cpu_enter_lowpower(void) { unsigned int v; @@ -57,7 +59,7 @@ static inline void spear13xx_do_lowpower(unsigned int cpu, int *spurious) for (;;) { wfi(); - if (pen_release == cpu) { + if (spear_pen_release == cpu) { /* * OK, proper wakeup, we're done */ diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index 39038a03836a..b1ff4bb86f6d 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -20,16 +20,21 @@ #include <mach/spear.h> #include "generic.h" +/* XXX spear_pen_release is cargo culted code - DO NOT COPY XXX */ +volatile int spear_pen_release = -1; + /* - * Write pen_release in a way that is guaranteed to be visible to all - * observers, irrespective of whether they're taking part in coherency + * XXX CARGO CULTED CODE - DO NOT COPY XXX + * + * Write spear_pen_release in a way that is guaranteed to be visible to + * all observers, irrespective of whether they're taking part in coherency * or not. This is necessary for the hotplug code to work reliably. */ -static void write_pen_release(int val) +static void spear_write_pen_release(int val) { - pen_release = val; + spear_pen_release = val; smp_wmb(); - sync_cache_w(&pen_release); + sync_cache_w(&spear_pen_release); } static DEFINE_SPINLOCK(boot_lock); @@ -42,7 +47,7 @@ static void spear13xx_secondary_init(unsigned int cpu) * let the primary processor know we're out of the * pen, then head off into the C entry point */ - write_pen_release(-1); + spear_write_pen_release(-1); /* * Synchronise with the boot thread. @@ -64,17 +69,17 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) /* * The secondary processor is waiting to be released from * the holding pen - release it, then wait for it to flag - * that it has been released by resetting pen_release. + * that it has been released by resetting spear_pen_release. * - * Note that "pen_release" is the hardware CPU ID, whereas + * Note that "spear_pen_release" is the hardware CPU ID, whereas * "cpu" is Linux's internal ID. */ - write_pen_release(cpu); + spear_write_pen_release(cpu); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { smp_rmb(); - if (pen_release == -1) + if (spear_pen_release == -1) break; udelay(10); @@ -86,7 +91,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) */ spin_unlock(&boot_lock); - return pen_release != -1 ? -ENOSYS : 0; + return spear_pen_release != -1 ? -ENOSYS : 0; } /* diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 805f306fa6f7..e22ccf87eded 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -172,7 +172,7 @@ after_errata: mov32 r5, TEGRA_IRAM_BASE + TEGRA_IRAM_RESET_HANDLER_OFFSET mov r0, #CPU_NOT_RESETTABLE cmp r10, #0 - strneb r0, [r5, #__tegra20_cpu1_resettable_status_offset] + strbne r0, [r5, #__tegra20_cpu1_resettable_status_offset] 1: #endif diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 24659952c278..be68d62566c7 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -215,8 +215,8 @@ v6_dma_inv_range: #endif tst r1, #D_CACHE_LINE_SIZE - 1 #ifdef CONFIG_DMA_CACHE_RWFO - ldrneb r2, [r1, #-1] @ read for ownership - strneb r2, [r1, #-1] @ write for ownership + ldrbne r2, [r1, #-1] @ read for ownership + strbne r2, [r1, #-1] @ write for ownership #endif bic r1, r1, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -284,8 +284,8 @@ ENTRY(v6_dma_flush_range) add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 #ifdef CONFIG_DMA_CACHE_RWFO - ldrlob r2, [r0] @ read for ownership - strlob r2, [r0] @ write for ownership + ldrblo r2, [r0] @ read for ownership + strblo r2, [r0] @ write for ownership #endif blo 1b mov r0, #0 diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c index b03202cddddb..f74cdce6d4da 100644 --- a/arch/arm/mm/copypage-v4mc.c +++ b/arch/arm/mm/copypage-v4mc.c @@ -45,6 +45,7 @@ static void mc_copy_user_page(void *from, void *to) int tmp; asm volatile ("\ + .syntax unified\n\ ldmia %0!, {r2, r3, ip, lr} @ 4\n\ 1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ stmia %1!, {r2, r3, ip, lr} @ 4\n\ @@ -56,7 +57,7 @@ static void mc_copy_user_page(void *from, void *to) ldmia %0!, {r2, r3, ip, lr} @ 4\n\ subs %2, %2, #1 @ 1\n\ stmia %1!, {r2, r3, ip, lr} @ 4\n\ - ldmneia %0!, {r2, r3, ip, lr} @ 4\n\ + ldmiane %0!, {r2, r3, ip, lr} @ 4\n\ bne 1b @ " : "+&r" (from), "+&r" (to), "=&r" (tmp) : "2" (PAGE_SIZE / 64) diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c index cd3e165afeed..6d336740aae4 100644 --- a/arch/arm/mm/copypage-v4wb.c +++ b/arch/arm/mm/copypage-v4wb.c @@ -27,6 +27,7 @@ static void v4wb_copy_user_page(void *kto, const void *kfrom) int tmp; asm volatile ("\ + .syntax unified\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\ 1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\ @@ -38,7 +39,7 @@ static void v4wb_copy_user_page(void *kto, const void *kfrom) ldmia %1!, {r3, r4, ip, lr} @ 4\n\ subs %2, %2, #1 @ 1\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\ - ldmneia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmiane %1!, {r3, r4, ip, lr} @ 4\n\ bne 1b @ 1\n\ mcr p15, 0, %1, c7, c10, 4 @ 1 drain WB" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c index 8614572e1296..3851bb396442 100644 --- a/arch/arm/mm/copypage-v4wt.c +++ b/arch/arm/mm/copypage-v4wt.c @@ -25,6 +25,7 @@ static void v4wt_copy_user_page(void *kto, const void *kfrom) int tmp; asm volatile ("\ + .syntax unified\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\ 1: stmia %0!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4+1\n\ @@ -34,7 +35,7 @@ static void v4wt_copy_user_page(void *kto, const void *kfrom) ldmia %1!, {r3, r4, ip, lr} @ 4\n\ subs %2, %2, #1 @ 1\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\ - ldmneia %1!, {r3, r4, ip, lr} @ 4\n\ + ldmiane %1!, {r3, r4, ip, lr} @ 4\n\ bne 1b @ 1\n\ mcr p15, 0, %2, c7, c7, 0 @ flush ID cache" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c6aab9c36ff1..43f46aa7ef33 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2279,7 +2279,7 @@ EXPORT_SYMBOL_GPL(arm_iommu_attach_device); * @dev: valid struct device pointer * * Detaches the provided device from a previously attached map. - * This voids the dma operations (dma_map_ops pointer) + * This overwrites the dma_ops pointer with appropriate non-IOMMU ops. */ void arm_iommu_detach_device(struct device *dev) { diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index 1d1edd064199..a033f6134a64 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -6,6 +6,7 @@ #include <asm/cputype.h> #include <asm/idmap.h> +#include <asm/hwcap.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/sections.h> @@ -110,7 +111,8 @@ static int __init init_static_idmap(void) __idmap_text_end, 0); /* Flush L1 for the hardware to see this page table content */ - flush_cache_louis(); + if (!(elf_hwcap & HWCAP_LPAE)) + flush_cache_louis(); return 0; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 15dddfe43319..c2daabbe0af0 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -282,15 +282,12 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) void __init bootmem_init(void) { - unsigned long min, max_low, max_high; - memblock_allow_resize(); - max_low = max_high = 0; - find_limits(&min, &max_low, &max_high); + find_limits(&min_low_pfn, &max_low_pfn, &max_pfn); - early_memtest((phys_addr_t)min << PAGE_SHIFT, - (phys_addr_t)max_low << PAGE_SHIFT); + early_memtest((phys_addr_t)min_low_pfn << PAGE_SHIFT, + (phys_addr_t)max_low_pfn << PAGE_SHIFT); /* * Sparsemem tries to allocate bootmem in memory_present(), @@ -308,16 +305,7 @@ void __init bootmem_init(void) * the sparse mem_map arrays initialized by sparse_init() * for memmap_init_zone(), otherwise all PFNs are invalid. */ - zone_sizes_init(min, max_low, max_high); - - /* - * This doesn't seem to be used by the Linux memory manager any - * more, but is used by ll_rw_block. If we can get rid of it, we - * also get rid of some of the stuff above as well. - */ - min_low_pfn = min; - max_low_pfn = max_low; - max_pfn = max_high; + zone_sizes_init(min_low_pfn, max_low_pfn, max_pfn); } /* @@ -498,55 +486,6 @@ void __init mem_init(void) mem_init_print_info(NULL); -#define MLK(b, t) b, t, ((t) - (b)) >> 10 -#define MLM(b, t) b, t, ((t) - (b)) >> 20 -#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) - - pr_notice("Virtual kernel memory layout:\n" - " vector : 0x%08lx - 0x%08lx (%4ld kB)\n" -#ifdef CONFIG_HAVE_TCM - " DTCM : 0x%08lx - 0x%08lx (%4ld kB)\n" - " ITCM : 0x%08lx - 0x%08lx (%4ld kB)\n" -#endif - " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" - " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" - " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" -#ifdef CONFIG_HIGHMEM - " pkmap : 0x%08lx - 0x%08lx (%4ld MB)\n" -#endif -#ifdef CONFIG_MODULES - " modules : 0x%08lx - 0x%08lx (%4ld MB)\n" -#endif - " .text : 0x%p" " - 0x%p" " (%4td kB)\n" - " .init : 0x%p" " - 0x%p" " (%4td kB)\n" - " .data : 0x%p" " - 0x%p" " (%4td kB)\n" - " .bss : 0x%p" " - 0x%p" " (%4td kB)\n", - - MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE), -#ifdef CONFIG_HAVE_TCM - MLK(DTCM_OFFSET, (unsigned long) dtcm_end), - MLK(ITCM_OFFSET, (unsigned long) itcm_end), -#endif - MLK(FIXADDR_START, FIXADDR_END), - MLM(VMALLOC_START, VMALLOC_END), - MLM(PAGE_OFFSET, (unsigned long)high_memory), -#ifdef CONFIG_HIGHMEM - MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) * - (PAGE_SIZE)), -#endif -#ifdef CONFIG_MODULES - MLM(MODULES_VADDR, MODULES_END), -#endif - - MLK_ROUNDUP(_text, _etext), - MLK_ROUNDUP(__init_begin, __init_end), - MLK_ROUNDUP(_sdata, _edata), - MLK_ROUNDUP(__bss_start, __bss_stop)); - -#undef MLK -#undef MLM -#undef MLK_ROUNDUP - /* * Check boundaries twice: Some fundamental inconsistencies can * be detected at build time already. diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c index 617a83def88a..0d7d5fb59247 100644 --- a/arch/arm/mm/pmsa-v8.c +++ b/arch/arm/mm/pmsa-v8.c @@ -165,7 +165,7 @@ static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_a return -EINVAL; bar = start; - lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED; lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN; @@ -181,7 +181,7 @@ static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_ad return -EINVAL; bar = start; - lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN; lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN; diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 47a5acc64433..acd5a66dfc23 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -139,6 +139,9 @@ __v7m_setup_cont: cpsie i svc #0 1: cpsid i + ldr r0, =exc_ret + orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK + str lr, [r0] ldmia sp, {r0-r3, r12} str r5, [r12, #11 * 4] @ restore the original SVC vector entry mov lr, r6 @ restore LR @@ -149,10 +152,10 @@ __v7m_setup_cont: @ Configure caches (if implemented) teq r8, #0 - stmneia sp, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 + stmiane sp, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 blne v7m_invalidate_l1 teq r8, #0 @ re-evalutae condition - ldmneia sp, {r0-r6, lr} + ldmiane sp, {r0-r6, lr} @ Configure the System Control Register to ensure 8-byte stack alignment @ Note the STKALIGN bit is either RW or RAO. diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 506386a3edde..d3842791e1c4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -77,6 +77,10 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) */ if (!vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID3; + + if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || + vcpu_el1_is_32bit(vcpu)) + vcpu->arch.hcr_el2 |= HCR_TID2; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) @@ -331,6 +335,14 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) return ESR_ELx_SYS64_ISS_RT(esr); } +static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_trap_is_iabt(vcpu)) + return false; + + return kvm_vcpu_dabt_iswrite(vcpu); +} + static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) { return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 222af1d2c3e4..a01fe087e022 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -31,6 +31,7 @@ #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#include <asm/smp_plat.h> #include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -58,16 +59,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu); int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); -struct kvm_arch { +struct kvm_vmid { /* The VMID generation used for the virt. memory system */ u64 vmid_gen; u32 vmid; +}; + +struct kvm_arch { + struct kvm_vmid vmid; /* stage2 entry level table */ pgd_t *pgd; + phys_addr_t pgd_phys; - /* VTTBR value associated with above pgd and vmid */ - u64 vttbr; /* VTCR_EL2 value for this VM */ u64 vtcr; @@ -382,7 +386,36 @@ void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); u64 __kvm_call_hyp(void *hypfn, ...); -#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) + +/* + * The couple of isb() below are there to guarantee the same behaviour + * on VHE as on !VHE, where the eret to EL1 acts as a context + * synchronization event. + */ +#define kvm_call_hyp(f, ...) \ + do { \ + if (has_vhe()) { \ + f(__VA_ARGS__); \ + isb(); \ + } else { \ + __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__); \ + } \ + } while(0) + +#define kvm_call_hyp_ret(f, ...) \ + ({ \ + typeof(f(__VA_ARGS__)) ret; \ + \ + if (has_vhe()) { \ + ret = f(__VA_ARGS__); \ + isb(); \ + } else { \ + ret = __kvm_call_hyp(kvm_ksym_ref(f), \ + ##__VA_ARGS__); \ + } \ + \ + ret; \ + }) void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); @@ -401,6 +434,13 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state); +static inline void kvm_init_host_cpu_context(kvm_cpu_context_t *cpu_ctxt, + int cpu) +{ + /* The host's MPIDR is immutable, so let's set it up at boot time */ + cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu); +} + void __kvm_enable_ssbs(void); static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index a80a7ef57325..4da765f2cca5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -21,6 +21,7 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> #include <asm/alternative.h> +#include <asm/kvm_mmu.h> #include <asm/sysreg.h> #define __hyp_text __section(.hyp.text) notrace @@ -163,7 +164,7 @@ void __noreturn __hyp_do_panic(unsigned long, ...); static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) { write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm->arch.vttbr, vttbr_el2); + write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); /* * ARM erratum 1165522 requires the actual execution of the above diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 8af4b1befa42..b0742a16c6c9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -138,7 +138,8 @@ static inline unsigned long __kern_hyp_va(unsigned long v) }) /* - * We currently only support a 40bit IPA. + * We currently support using a VM-specified IPA size. For backward + * compatibility, the default IPA size is fixed to 40bits. */ #define KVM_PHYS_SHIFT (40) @@ -591,9 +592,15 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); } -static inline bool kvm_cpu_has_cnp(void) +static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) { - return system_supports_cnp(); + struct kvm_vmid *vmid = &kvm->arch.vmid; + u64 vmid_field, baddr; + u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; + + baddr = kvm->arch.pgd_phys; + vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; + return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 72dc4c011014..5b267dec6194 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -361,6 +361,7 @@ #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) +#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) #define SYS_CLIDR_EL1 sys_reg(3, 1, 0, 0, 1) #define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7) @@ -392,6 +393,10 @@ #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) +#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0) +#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1) +#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0) + #define __PMEV_op2(n) ((n) & 0x7) #define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3)) #define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n)) @@ -426,7 +431,7 @@ #define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) #define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define SYS_ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) #define __SYS__LR0_EL2(x) sys_reg(3, 4, 12, 12, x) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 0f2a135ba15b..690e033a91c0 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -3,9 +3,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Iarch/arm64/kvm -Ivirt/kvm/arm/vgic -CFLAGS_arm.o := -I. -CFLAGS_mmu.o := -I. +ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic KVM=../../../virt/kvm diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index f39801e4136c..fd917d6d12af 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -76,7 +76,7 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) void kvm_arm_init_debug(void) { - __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2)); + __this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2)); } /** diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 952f6cb9cf72..2845aa680841 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -40,9 +40,6 @@ * arch/arm64/kernel/hyp_stub.S. */ ENTRY(__kvm_call_hyp) -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN hvc #0 ret -alternative_else_nop_endif - b __vhe_hyp_call ENDPROC(__kvm_call_hyp) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 73c1b483ec39..2b1e686772bf 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -43,18 +43,6 @@ ldr lr, [sp], #16 .endm -ENTRY(__vhe_hyp_call) - do_el2_call - /* - * We used to rely on having an exception return to get - * an implicit isb. In the E2H case, we don't have it anymore. - * rather than changing all the leaf functions, just do it here - * before returning to the rest of the kernel. - */ - isb - ret -ENDPROC(__vhe_hyp_call) - el1_sync: // Guest trapped into EL2 mrs x0, esr_el2 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index b426e2cf973c..c52a8451637c 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -53,7 +53,6 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt) static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { - ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr); ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c936aa40c3f4..539feecda5b8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -982,6 +982,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +#define reg_to_encoding(x) \ + sys_reg((u32)(x)->Op0, (u32)(x)->Op1, \ + (u32)(x)->CRn, (u32)(x)->CRm, (u32)(x)->Op2); + /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ @@ -1003,44 +1007,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_cntp_tval(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool access_arch_timer(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) { - u64 now = kvm_phys_timer_read(); - u64 cval; + enum kvm_arch_timers tmr; + enum kvm_arch_timer_regs treg; + u64 reg = reg_to_encoding(r); - if (p->is_write) { - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, - p->regval + now); - } else { - cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); - p->regval = cval - now; + switch (reg) { + case SYS_CNTP_TVAL_EL0: + case SYS_AARCH32_CNTP_TVAL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_TVAL; + break; + case SYS_CNTP_CTL_EL0: + case SYS_AARCH32_CNTP_CTL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CTL; + break; + case SYS_CNTP_CVAL_EL0: + case SYS_AARCH32_CNTP_CVAL: + tmr = TIMER_PTIMER; + treg = TIMER_REG_CVAL; + break; + default: + BUG(); } - return true; -} - -static bool access_cntp_ctl(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval); - else - p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL); - - return true; -} - -static bool access_cntp_cval(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ if (p->is_write) - kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval); + kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval); else - p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL); + p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg); return true; } @@ -1160,6 +1158,64 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(rd, uaddr, true); } +static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = read_sanitised_ftr_reg(SYS_CTR_EL0); + return true; +} + +static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = read_sysreg(clidr_el1); + return true; +} + +static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + vcpu_write_sys_reg(vcpu, p->regval, r->reg); + else + p->regval = vcpu_read_sys_reg(vcpu, r->reg); + return true; +} + +static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 csselr; + + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + csselr = vcpu_read_sys_reg(vcpu, CSSELR_EL1); + p->regval = get_ccsidr(csselr); + + /* + * Guests should not be doing cache operations by set/way at all, and + * for this reason, we trap them and attempt to infer the intent, so + * that we can flush the entire guest's address space at the appropriate + * time. + * To prevent this trapping from causing performance problems, let's + * expose the geometry of all data and unified caches (which are + * guaranteed to be PIPT and thus non-aliasing) as 1 set and 1 way. + * [If guests should attempt to infer aliasing properties from the + * geometry (which is not permitted by the architecture), they would + * only do so for virtually indexed caches.] + */ + if (!(csselr & 1)) // data or unified cache + p->regval &= ~GENMASK(27, 3); + return true; +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1377,7 +1433,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, - { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, + { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, + { SYS_DESC(SYS_CLIDR_EL1), access_clidr }, + { SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 }, + { SYS_DESC(SYS_CTR_EL0), access_ctr }, { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, @@ -1400,9 +1459,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, - { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, - { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, - { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, + { SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer }, + { SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer }, /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -1476,7 +1535,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, - { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, + { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x700 }, }; static bool trap_dbgidr(struct kvm_vcpu *vcpu, @@ -1677,6 +1736,7 @@ static const struct sys_reg_desc cp14_64_regs[] = { * register). */ static const struct sys_reg_desc cp15_regs[] = { + { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, @@ -1723,10 +1783,9 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, - /* CNTP_TVAL */ - { Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval }, - /* CNTP_CTL */ - { Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl }, + /* Arch Tmers */ + { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, + { SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer }, /* PMEVCNTRn */ PMU_PMEVCNTR(0), @@ -1794,6 +1853,10 @@ static const struct sys_reg_desc cp15_regs[] = { PMU_PMEVTYPER(30), /* PMCCFILTR */ { Op1(0), CRn(14), CRm(15), Op2(7), access_pmu_evtyper }, + + { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, + { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, }; static const struct sys_reg_desc cp15_64_regs[] = { @@ -1803,7 +1866,7 @@ static const struct sys_reg_desc cp15_64_regs[] = { { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ - { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval }, + { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; /* Target specific emulation tables */ @@ -1832,30 +1895,19 @@ static const struct sys_reg_desc *get_target_table(unsigned target, } } -#define reg_to_match_value(x) \ - ({ \ - unsigned long val; \ - val = (x)->Op0 << 14; \ - val |= (x)->Op1 << 11; \ - val |= (x)->CRn << 7; \ - val |= (x)->CRm << 3; \ - val |= (x)->Op2; \ - val; \ - }) - static int match_sys_reg(const void *key, const void *elt) { const unsigned long pval = (unsigned long)key; const struct sys_reg_desc *r = elt; - return pval - reg_to_match_value(r); + return pval - reg_to_encoding(r); } static const struct sys_reg_desc *find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], unsigned int num) { - unsigned long pval = reg_to_match_value(params); + unsigned long pval = reg_to_encoding(params); return bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); } @@ -2218,11 +2270,15 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, } FUNCTION_INVARIANT(midr_el1) -FUNCTION_INVARIANT(ctr_el0) FUNCTION_INVARIANT(revidr_el1) FUNCTION_INVARIANT(clidr_el1) FUNCTION_INVARIANT(aidr_el1) +static void get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r) +{ + ((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0); +} + /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] = { { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index d2abd98471e8..41204a49cf95 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0f98f00da2ea..e6b5bb012ccb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -99,6 +99,8 @@ struct kvm_nested_guest; struct kvm_vm_stat { ulong remote_tlb_flush; + ulong num_2M_pages; + ulong num_1G_pages; }; struct kvm_vcpu_stat { @@ -377,6 +379,7 @@ struct kvmppc_mmu { void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs); u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr); u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr); + int (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb); void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr); void (*slbia)(struct kvm_vcpu *vcpu); /* book3s */ @@ -837,7 +840,7 @@ struct kvm_vcpu_arch { static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a6c8548ed9fa..ac22b28ae78d 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -36,6 +36,8 @@ #endif #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #include <asm/paca.h> +#include <asm/xive.h> +#include <asm/cpu_has_feature.h> #endif /* @@ -617,6 +619,18 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { } #endif /* CONFIG_KVM_XIVE */ +#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER) +static inline bool xics_on_xive(void) +{ + return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool xics_on_xive(void) +{ + return false; +} +#endif + /* * Prototypes for functions called only from assembler code. * Having prototypes reduces sparse errors. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 8c876c166ef2..26ca425f4c2c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char { #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) +#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) +#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9a7dadbe1f17..10c5579d20ce 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -39,6 +39,7 @@ #include "book3s.h" #include "trace.h" +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU /* #define EXIT_DEBUG */ @@ -71,6 +72,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "pthru_all", VCPU_STAT(pthru_all) }, { "pthru_host", VCPU_STAT(pthru_host) }, { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, + { "largepages_2M", VM_STAT(num_2M_pages) }, + { "largepages_1G", VM_STAT(num_1G_pages) }, { NULL } }; @@ -642,7 +645,7 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, r = -ENXIO; break; } - if (xive_enabled()) + if (xics_on_xive()) *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu)); else *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu)); @@ -715,7 +718,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, r = -ENXIO; break; } - if (xive_enabled()) + if (xics_on_xive()) r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val)); else r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val)); @@ -991,7 +994,7 @@ int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall) int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status) { - if (xive_enabled()) + if (xics_on_xive()) return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level, line_status); else @@ -1044,7 +1047,7 @@ static int kvmppc_book3s_init(void) #ifdef CONFIG_KVM_XICS #ifdef CONFIG_KVM_XIVE - if (xive_enabled()) { + if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); } else @@ -1057,7 +1060,7 @@ static int kvmppc_book3s_init(void) static void kvmppc_book3s_exit(void) { #ifdef CONFIG_KVM_XICS - if (xive_enabled()) + if (xics_on_xive()) kvmppc_xive_exit_module(); #endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 612169988a3d..6f789f674048 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -425,6 +425,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu) mmu->slbmte = NULL; mmu->slbmfee = NULL; mmu->slbmfev = NULL; + mmu->slbfee = NULL; mmu->slbie = NULL; mmu->slbia = NULL; } diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index c92dd25bed23..d4b967f0e8d4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -435,6 +435,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb) kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT); } +static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr, + ulong *ret_slb) +{ + struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr); + + if (slbe) { + *ret_slb = slbe->origv; + return 0; + } + *ret_slb = 0; + return -ENOENT; +} + static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr) { struct kvmppc_slb *slbe; @@ -670,6 +683,7 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu) mmu->slbmte = kvmppc_mmu_book3s_64_slbmte; mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee; mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev; + mmu->slbfee = kvmppc_mmu_book3s_64_slbfee; mmu->slbie = kvmppc_mmu_book3s_64_slbie; mmu->slbia = kvmppc_mmu_book3s_64_slbia; mmu->xlate = kvmppc_mmu_book3s_64_xlate; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index bd2dcfbf00cd..be7bc070eae5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -442,6 +442,24 @@ int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, u32 last_inst; /* + * Fast path - check if the guest physical address corresponds to a + * device on the FAST_MMIO_BUS, if so we can avoid loading the + * instruction all together, then we can just handle it and return. + */ + if (is_store) { + int idx, ret; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0, + NULL); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + if (!ret) { + kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4); + return RESUME_GUEST; + } + } + + /* * If we fail, we just return to the guest and try executing it again. */ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) != diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 1b821c6efdef..f55ef071883f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -403,8 +403,13 @@ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, if (!memslot) return; } - if (shift) + if (shift) { /* 1GB or 2MB page */ page_size = 1ul << shift; + if (shift == PMD_SHIFT) + kvm->stat.num_2M_pages--; + else if (shift == PUD_SHIFT) + kvm->stat.num_1G_pages--; + } gpa &= ~(page_size - 1); hpa = old & PTE_RPN_MASK; @@ -878,6 +883,14 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, put_page(page); } + /* Increment number of large pages if we (successfully) inserted one */ + if (!ret) { + if (level == 1) + kvm->stat.num_2M_pages++; + else if (level == 2) + kvm->stat.num_1G_pages++; + } + return ret; } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 532ab79734c7..f02b04973710 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -133,7 +133,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, continue; kref_put(&stit->kref, kvm_spapr_tce_liobn_put); - return; } } } @@ -338,14 +337,15 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, } } + kvm_get_kvm(kvm); if (!ret) ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops, stt, O_RDWR | O_CLOEXEC); - if (ret >= 0) { + if (ret >= 0) list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables); - kvm_get_kvm(kvm); - } + else + kvm_put_kvm(kvm); mutex_unlock(&kvm->lock); diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8c7e933e942e..6ef7c5f00a49 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -47,6 +47,7 @@ #define OP_31_XOP_SLBMFEV 851 #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 +#define OP_31_XOP_SLBFEE 979 #define OP_31_XOP_TBEGIN 654 #define OP_31_XOP_TABORT 910 @@ -416,6 +417,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.slbia(vcpu); break; + case OP_31_XOP_SLBFEE: + if (!(inst & 1) || !vcpu->arch.mmu.slbfee) { + return EMULATE_FAIL; + } else { + ulong b, t; + ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK; + + b = kvmppc_get_gpr(vcpu, rb); + if (!vcpu->arch.mmu.slbfee(vcpu, b, &t)) + cr |= 2 << CR0_SHIFT; + kvmppc_set_gpr(vcpu, rt, t); + /* copy XER[SO] bit to CR0[SO] */ + cr |= (vcpu->arch.regs.xer & 0x80000000) >> + (31 - CR0_SHIFT); + kvmppc_set_cr(vcpu, cr); + } + break; case OP_31_XOP_SLBMFEE: if (!vcpu->arch.mmu.slbmfee) { emulated = EMULATE_FAIL; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a3d5318f5d1e..06964350b97a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -922,7 +922,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_IPOLL: case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) { - if (xive_enabled()) { + if (xics_on_xive()) { ret = H_NOT_AVAILABLE; return RESUME_GUEST; } @@ -937,6 +937,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5)); break; +#ifdef CONFIG_SPAPR_TCE_IOMMU case H_GET_TCE: ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5)); @@ -966,6 +967,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (ret == H_TOO_HARD) return RESUME_HOST; break; +#endif case H_RANDOM: if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) ret = H_HARDWARE; @@ -1445,7 +1447,7 @@ static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; r = RESUME_GUEST; - if (!xive_enabled()) + if (!xics_on_xive()) kvmppc_xics_rm_complete(vcpu, 0); break; default: @@ -3648,11 +3650,12 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc, static void grow_halt_poll_ns(struct kvmppc_vcore *vc) { - /* 10us base */ - if (vc->halt_poll_ns == 0 && halt_poll_ns_grow) - vc->halt_poll_ns = 10000; - else - vc->halt_poll_ns *= halt_poll_ns_grow; + if (!halt_poll_ns_grow) + return; + + vc->halt_poll_ns *= halt_poll_ns_grow; + if (vc->halt_poll_ns < halt_poll_ns_grow_start) + vc->halt_poll_ns = halt_poll_ns_grow_start; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) @@ -3666,7 +3669,7 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) #ifdef CONFIG_KVM_XICS static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu) { - if (!xive_enabled()) + if (!xics_on_xive()) return false; return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr < vcpu->arch.xive_saved_state.cppr; @@ -4226,7 +4229,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); srcu_read_unlock(&kvm->srcu, srcu_idx); } else if (r == RESUME_PASSTHROUGH) { - if (WARN_ON(xive_enabled())) + if (WARN_ON(xics_on_xive())) r = H_SUCCESS; else r = kvmppc_xics_rm_complete(vcpu, 0); @@ -4750,7 +4753,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * If xive is enabled, we route 0x500 interrupts directly * to the guest. */ - if (xive_enabled()) + if (xics_on_xive()) lpcr |= LPCR_LPES; } @@ -4986,7 +4989,7 @@ static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) if (i == pimap->n_mapped) pimap->n_mapped++; - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc); else kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq); @@ -5027,7 +5030,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) return -ENODEV; } - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc); else kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq); @@ -5359,13 +5362,11 @@ static int kvm_init_subcore_bitmap(void) continue; sibling_subcore_state = - kmalloc_node(sizeof(struct sibling_subcore_state), + kzalloc_node(sizeof(struct sibling_subcore_state), GFP_KERNEL, node); if (!sibling_subcore_state) return -ENOMEM; - memset(sibling_subcore_state, 0, - sizeof(struct sibling_subcore_state)); for (j = 0; j < threads_per_core; j++) { int cpu = first_cpu + j; @@ -5406,7 +5407,7 @@ static int kvmppc_book3s_init_hv(void) * indirectly, via OPAL. */ #ifdef CONFIG_SMP - if (!xive_enabled() && !kvmhv_on_pseries() && + if (!xics_on_xive() && !kvmhv_on_pseries() && !local_paca->kvm_hstate.xics_phys) { struct device_node *np; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a71e2fc00a4e..b0cf22477e87 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -257,7 +257,7 @@ void kvmhv_rm_send_ipi(int cpu) } /* We should never reach this */ - if (WARN_ON_ONCE(xive_enabled())) + if (WARN_ON_ONCE(xics_on_xive())) return; /* Else poke the target with an IPI */ @@ -577,7 +577,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_xirr(vcpu); if (unlikely(!__xive_vm_h_xirr)) @@ -592,7 +592,7 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu) if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; vcpu->arch.regs.gpr[5] = get_tb(); - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_xirr(vcpu); if (unlikely(!__xive_vm_h_xirr)) @@ -606,7 +606,7 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_ipoll(vcpu, server); if (unlikely(!__xive_vm_h_ipoll)) @@ -621,7 +621,7 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_ipi(vcpu, server, mfrr); if (unlikely(!__xive_vm_h_ipi)) @@ -635,7 +635,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_cppr(vcpu, cppr); if (unlikely(!__xive_vm_h_cppr)) @@ -649,7 +649,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) { if (!kvmppc_xics_enabled(vcpu)) return H_TOO_HARD; - if (xive_enabled()) { + if (xics_on_xive()) { if (is_rm()) return xive_rm_h_eoi(vcpu, xirr); if (unlikely(!__xive_vm_h_eoi)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index b3f5786b20dc..3b9662a4207e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -144,6 +144,13 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, return; } + if (xive_enabled() && kvmhv_on_pseries()) { + /* No XICS access or hypercalls available, too hard */ + this_icp->rm_action |= XICS_RM_KICK_VCPU; + this_icp->rm_kick_target = vcpu; + return; + } + /* * Check if the core is loaded, * if not, find an available host core to post to wake the VCPU, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 25043b50cb30..3a5e719ef032 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2272,8 +2272,13 @@ hcall_real_table: .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table .long DOTSYM(kvmppc_h_protect) - hcall_real_table +#ifdef CONFIG_SPAPR_TCE_IOMMU .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table .long DOTSYM(kvmppc_rm_h_put_tce) - hcall_real_table +#else + .long 0 /* 0x1c */ + .long 0 /* 0x20 */ +#endif .long 0 /* 0x24 - H_SET_SPRG0 */ .long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table .long 0 /* 0x2c */ @@ -2351,8 +2356,13 @@ hcall_real_table: .long 0 /* 0x12c */ .long 0 /* 0x130 */ .long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table +#ifdef CONFIG_SPAPR_TCE_IOMMU .long DOTSYM(kvmppc_rm_h_stuff_tce) - hcall_real_table .long DOTSYM(kvmppc_rm_h_put_tce_indirect) - hcall_real_table +#else + .long 0 /* 0x138 */ + .long 0 /* 0x13c */ +#endif .long 0 /* 0x140 */ .long 0 /* 0x144 */ .long 0 /* 0x148 */ diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 2d3b2b1cc272..4e178c4c1ea5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -33,7 +33,7 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) server = be32_to_cpu(args->args[1]); priority = be32_to_cpu(args->args[2]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority); else rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); @@ -56,7 +56,7 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); server = priority = 0; - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority); else rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -83,7 +83,7 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_int_off(vcpu->kvm, irq); else rc = kvmppc_xics_int_off(vcpu->kvm, irq); @@ -105,7 +105,7 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) irq = be32_to_cpu(args->args[0]); - if (xive_enabled()) + if (xics_on_xive()) rc = kvmppc_xive_int_on(vcpu->kvm, irq); else rc = kvmppc_xics_int_on(vcpu->kvm, irq); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b90a7d154180..8885377ec3e0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -748,7 +748,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu); break; case KVMPPC_IRQ_XICS: - if (xive_enabled()) + if (xics_on_xive()) kvmppc_xive_cleanup_vcpu(vcpu); else kvmppc_xics_free_icp(vcpu); @@ -1931,7 +1931,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = -EPERM; dev = kvm_device_from_filp(f.file); if (dev) { - if (xive_enabled()) + if (xics_on_xive()) r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]); else r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]); @@ -2189,10 +2189,12 @@ static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | - KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | + KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | - KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | + KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; } return 0; } @@ -2251,12 +2253,16 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) if (have_fw_feat(fw_features, "enabled", "fw-count-cache-disabled")) cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + if (have_fw_feat(fw_features, "enabled", + "fw-count-cache-flush-bcctr2,0,0")) + cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | - KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS | + KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST; if (have_fw_feat(fw_features, "enabled", "speculation-policy-favor-security")) @@ -2267,9 +2273,13 @@ static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) if (!have_fw_feat(fw_features, "disabled", "needs-spec-barrier-for-bound-checks")) cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + if (have_fw_feat(fw_features, "enabled", + "needs-count-cache-flush-on-context-switch")) + cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | - KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR | + KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE; of_node_put(fw_features); } diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 225667652069..1727180e8ca1 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -331,5 +331,6 @@ extern void css_schedule_reprobe(void); /* Function from drivers/s390/cio/chsc.c */ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); +int chsc_sgib(u32 origin); #endif diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 2f7f27e5493f..afaf5e3c57fd 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -62,6 +62,7 @@ enum interruption_class { IRQIO_MSI, IRQIO_VIR, IRQIO_VAI, + IRQIO_GAL, NMI_NMI, CPU_RST, NR_ARCH_IRQS diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h index 6cb9e2ed05b6..b2cc1ec78d06 100644 --- a/arch/s390/include/asm/isc.h +++ b/arch/s390/include/asm/isc.h @@ -21,6 +21,7 @@ /* Adapter interrupts. */ #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ #define PCI_ISC 2 /* PCI I/O subchannels */ +#define GAL_ISC 5 /* GIB alert */ #define AP_ISC 6 /* adjunct processor (crypto) devices */ /* Functions for registration of I/O interruption subclasses */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d5d24889c3bc..c47e22bba87f 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -591,7 +591,6 @@ struct kvm_s390_float_interrupt { struct kvm_s390_mchk_info mchk; struct kvm_s390_ext_info srv_signal; int next_rr_cpu; - unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct mutex ais_lock; u8 simm; u8 nimm; @@ -712,6 +711,7 @@ struct s390_io_adapter { struct kvm_s390_cpu_model { /* facility mask supported by kvm & hosting machine */ __u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64]; + struct kvm_s390_vm_cpu_subfunc subfuncs; /* facility list requested by guest (in dma page) */ __u64 *fac_list; u64 cpuid; @@ -782,9 +782,21 @@ struct kvm_s390_gisa { u8 reserved03[11]; u32 airq_count; } g1; + struct { + u64 word[4]; + } u64; }; }; +struct kvm_s390_gib { + u32 alert_list_origin; + u32 reserved01; + u8:5; + u8 nisc:3; + u8 reserved03[3]; + u32 reserved04[5]; +}; + /* * sie_page2 has to be allocated as DMA because fac_list, crycb and * gisa need 31bit addresses in the sie control block. @@ -793,7 +805,8 @@ struct sie_page2 { __u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */ struct kvm_s390_crypto_cb crycb; /* 0x0800 */ struct kvm_s390_gisa gisa; /* 0x0900 */ - u8 reserved920[0x1000 - 0x920]; /* 0x0920 */ + struct kvm *kvm; /* 0x0920 */ + u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; struct kvm_s390_vsie { @@ -804,6 +817,20 @@ struct kvm_s390_vsie { struct page *pages[KVM_MAX_VCPUS]; }; +struct kvm_s390_gisa_iam { + u8 mask; + spinlock_t ref_lock; + u32 ref_count[MAX_ISC + 1]; +}; + +struct kvm_s390_gisa_interrupt { + struct kvm_s390_gisa *origin; + struct kvm_s390_gisa_iam alert; + struct hrtimer timer; + u64 expires; + DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS); +}; + struct kvm_arch{ void *sca; int use_esca; @@ -837,7 +864,8 @@ struct kvm_arch{ atomic64_t cmma_dirty_pages; /* subset of available cpu features enabled by user space */ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); - struct kvm_s390_gisa *gisa; + DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS); + struct kvm_s390_gisa_interrupt gisa_int; }; #define KVM_HVA_ERR_BAD (-1UL) @@ -871,6 +899,9 @@ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, extern int sie64a(struct kvm_s390_sie_block *, u64 *); extern char sie_exit; +extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); +extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); + static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_check_processor_compat(void *rtn) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} @@ -878,7 +909,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} -static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} +static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 0e8d68bac82c..0cd5a5f96729 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -88,6 +88,7 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" }, {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"}, {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"}, + {.irq = IRQIO_GAL, .name = "GAL", .desc = "[I/O] GIB Alert"}, {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"}, {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"}, }; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index fcb55b02990e..82162867f378 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -7,6 +7,9 @@ * Author(s): Carsten Otte <cotte@de.ibm.com> */ +#define KMSG_COMPONENT "kvm-s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/hrtimer.h> @@ -23,6 +26,7 @@ #include <asm/gmap.h> #include <asm/switch_to.h> #include <asm/nmi.h> +#include <asm/airq.h> #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -31,6 +35,8 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +static struct kvm_s390_gib *gib; + /* handle external calls via sigp interpretation facility */ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { @@ -217,22 +223,100 @@ static inline u8 int_word_to_isc(u32 int_word) */ #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE) -static inline void kvm_s390_gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +/** + * gisa_set_iam - change the GISA interruption alert mask + * + * @gisa: gisa to operate on + * @iam: new IAM value to use + * + * Change the IAM atomically with the next alert address and the IPM + * of the GISA if the GISA is not part of the GIB alert list. All three + * fields are located in the first long word of the GISA. + * + * Returns: 0 on success + * -EBUSY in case the gisa is part of the alert list + */ +static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam) +{ + u64 word, _word; + + do { + word = READ_ONCE(gisa->u64.word[0]); + if ((u64)gisa != word >> 32) + return -EBUSY; + _word = (word & ~0xffUL) | iam; + } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); + + return 0; +} + +/** + * gisa_clear_ipm - clear the GISA interruption pending mask + * + * @gisa: gisa to operate on + * + * Clear the IPM atomically with the next alert address and the IAM + * of the GISA unconditionally. All three fields are located in the + * first long word of the GISA. + */ +static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa) +{ + u64 word, _word; + + do { + word = READ_ONCE(gisa->u64.word[0]); + _word = word & ~(0xffUL << 24); + } while (cmpxchg(&gisa->u64.word[0], word, _word) != word); +} + +/** + * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM + * + * @gi: gisa interrupt struct to work on + * + * Atomically restores the interruption alert mask if none of the + * relevant ISCs are pending and return the IPM. + * + * Returns: the relevant pending ISCs + */ +static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi) +{ + u8 pending_mask, alert_mask; + u64 word, _word; + + do { + word = READ_ONCE(gi->origin->u64.word[0]); + alert_mask = READ_ONCE(gi->alert.mask); + pending_mask = (u8)(word >> 24) & alert_mask; + if (pending_mask) + return pending_mask; + _word = (word & ~0xffUL) | alert_mask; + } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word); + + return 0; +} + +static inline int gisa_in_alert_list(struct kvm_s390_gisa *gisa) +{ + return READ_ONCE(gisa->next_alert) != (u32)(u64)gisa; +} + +static inline void gisa_set_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { set_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); } -static inline u8 kvm_s390_gisa_get_ipm(struct kvm_s390_gisa *gisa) +static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) { return READ_ONCE(gisa->ipm); } -static inline void kvm_s390_gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); } -static inline int kvm_s390_gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) +static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); } @@ -245,8 +329,13 @@ static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu) static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu) { - return pending_irqs_no_gisa(vcpu) | - kvm_s390_gisa_get_ipm(vcpu->kvm->arch.gisa) << IRQ_PEND_IO_ISC_7; + struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; + unsigned long pending_mask; + + pending_mask = pending_irqs_no_gisa(vcpu); + if (gi->origin) + pending_mask |= gisa_get_ipm(gi->origin) << IRQ_PEND_IO_ISC_7; + return pending_mask; } static inline int isc_to_irq_type(unsigned long isc) @@ -318,13 +407,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) static void __set_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT); - set_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); + set_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); } static void __unset_cpu_idle(struct kvm_vcpu *vcpu) { kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT); - clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); + clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); } static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) @@ -345,7 +434,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) { if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_IO_MASK)) return; - else if (psw_ioint_disabled(vcpu)) + if (psw_ioint_disabled(vcpu)) kvm_s390_set_cpuflags(vcpu, CPUSTAT_IO_INT); else vcpu->arch.sie_block->lctl |= LCTL_CR6; @@ -353,7 +442,7 @@ static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) { - if (!(pending_irqs(vcpu) & IRQ_PEND_EXT_MASK)) + if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_EXT_MASK)) return; if (psw_extint_disabled(vcpu)) kvm_s390_set_cpuflags(vcpu, CPUSTAT_EXT_INT); @@ -363,7 +452,7 @@ static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu) { - if (!(pending_irqs(vcpu) & IRQ_PEND_MCHK_MASK)) + if (!(pending_irqs_no_gisa(vcpu) & IRQ_PEND_MCHK_MASK)) return; if (psw_mchk_disabled(vcpu)) vcpu->arch.sie_block->ictl |= ICTL_LPSW; @@ -956,6 +1045,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, { struct list_head *isc_list; struct kvm_s390_float_interrupt *fi; + struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti = NULL; struct kvm_s390_io_info io; u32 isc; @@ -998,8 +1088,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, goto out; } - if (vcpu->kvm->arch.gisa && - kvm_s390_gisa_tac_ipm_gisc(vcpu->kvm->arch.gisa, isc)) { + if (gi->origin && gisa_tac_ipm_gisc(gi->origin, isc)) { /* * in case an adapter interrupt was not delivered * in SIE context KVM will handle the delivery @@ -1089,6 +1178,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu) int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) { + struct kvm_s390_gisa_interrupt *gi = &vcpu->kvm->arch.gisa_int; u64 sltime; vcpu->stat.exit_wait_state++; @@ -1102,6 +1192,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; /* disabled wait */ } + if (gi->origin && + (gisa_get_ipm_or_restore_iam(gi) & + vcpu->arch.sie_block->gcr[6] >> 24)) + return 0; + if (!ckc_interrupts_enabled(vcpu) && !cpu_timer_interrupts_enabled(vcpu)) { VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); @@ -1533,18 +1628,19 @@ static struct kvm_s390_interrupt_info *get_top_io_int(struct kvm *kvm, static int get_top_gisa_isc(struct kvm *kvm, u64 isc_mask, u32 schid) { + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; unsigned long active_mask; int isc; if (schid) goto out; - if (!kvm->arch.gisa) + if (!gi->origin) goto out; - active_mask = (isc_mask & kvm_s390_gisa_get_ipm(kvm->arch.gisa) << 24) << 32; + active_mask = (isc_mask & gisa_get_ipm(gi->origin) << 24) << 32; while (active_mask) { isc = __fls(active_mask) ^ (BITS_PER_LONG - 1); - if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, isc)) + if (gisa_tac_ipm_gisc(gi->origin, isc)) return isc; clear_bit_inv(isc, &active_mask); } @@ -1567,6 +1663,7 @@ out: struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 isc_mask, u32 schid) { + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti, *tmp_inti; int isc; @@ -1584,7 +1681,7 @@ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, /* both types of interrupts present */ if (int_word_to_isc(inti->io.io_int_word) <= isc) { /* classical IO int with higher priority */ - kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); + gisa_set_ipm_gisc(gi->origin, isc); goto out; } gisa_out: @@ -1596,7 +1693,7 @@ gisa_out: kvm_s390_reinject_io_int(kvm, inti); inti = tmp_inti; } else - kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); + gisa_set_ipm_gisc(gi->origin, isc); out: return inti; } @@ -1685,6 +1782,7 @@ static int __inject_float_mchk(struct kvm *kvm, static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_float_interrupt *fi; struct list_head *list; int isc; @@ -1692,9 +1790,9 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) kvm->stat.inject_io++; isc = int_word_to_isc(inti->io.io_int_word); - if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) { + if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) { VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc); - kvm_s390_gisa_set_ipm_gisc(kvm->arch.gisa, isc); + gisa_set_ipm_gisc(gi->origin, isc); kfree(inti); return 0; } @@ -1726,7 +1824,6 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) */ static void __floating_irq_kick(struct kvm *kvm, u64 type) { - struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; struct kvm_vcpu *dst_vcpu; int sigcpu, online_vcpus, nr_tries = 0; @@ -1735,11 +1832,11 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) return; /* find idle VCPUs first, then round robin */ - sigcpu = find_first_bit(fi->idle_mask, online_vcpus); + sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); if (sigcpu == online_vcpus) { do { - sigcpu = fi->next_rr_cpu; - fi->next_rr_cpu = (fi->next_rr_cpu + 1) % online_vcpus; + sigcpu = kvm->arch.float_int.next_rr_cpu++; + kvm->arch.float_int.next_rr_cpu %= online_vcpus; /* avoid endless loops if all vcpus are stopped */ if (nr_tries++ >= online_vcpus) return; @@ -1753,7 +1850,8 @@ static void __floating_irq_kick(struct kvm *kvm, u64 type) kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (!(type & KVM_S390_INT_IO_AI_MASK && kvm->arch.gisa)) + if (!(type & KVM_S390_INT_IO_AI_MASK && + kvm->arch.gisa_int.origin)) kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default: @@ -2003,6 +2101,7 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; struct kvm_s390_irq *buf; @@ -2026,15 +2125,14 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) max_irqs = len / sizeof(struct kvm_s390_irq); - if (kvm->arch.gisa && - kvm_s390_gisa_get_ipm(kvm->arch.gisa)) { + if (gi->origin && gisa_get_ipm(gi->origin)) { for (i = 0; i <= MAX_ISC; i++) { if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; goto out_nolock; } - if (kvm_s390_gisa_tac_ipm_gisc(kvm->arch.gisa, i)) { + if (gisa_tac_ipm_gisc(gi->origin, i)) { irq = (struct kvm_s390_irq *) &buf[n]; irq->type = KVM_S390_INT_IO(1, 0, 0, 0); irq->u.io.io_int_word = isc_to_int_word(i); @@ -2831,7 +2929,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { int scn; - unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; struct kvm_s390_irq irq; @@ -2884,27 +2982,278 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) return n; } -void kvm_s390_gisa_clear(struct kvm *kvm) +static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) { - if (kvm->arch.gisa) { - memset(kvm->arch.gisa, 0, sizeof(struct kvm_s390_gisa)); - kvm->arch.gisa->next_alert = (u32)(u64)kvm->arch.gisa; - VM_EVENT(kvm, 3, "gisa 0x%pK cleared", kvm->arch.gisa); + int vcpu_id, online_vcpus = atomic_read(&kvm->online_vcpus); + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + struct kvm_vcpu *vcpu; + + for_each_set_bit(vcpu_id, kvm->arch.idle_mask, online_vcpus) { + vcpu = kvm_get_vcpu(kvm, vcpu_id); + if (psw_ioint_disabled(vcpu)) + continue; + deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask) { + /* lately kicked but not yet running */ + if (test_and_set_bit(vcpu_id, gi->kicked_mask)) + return; + kvm_s390_vcpu_wakeup(vcpu); + return; + } } } +static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) +{ + struct kvm_s390_gisa_interrupt *gi = + container_of(timer, struct kvm_s390_gisa_interrupt, timer); + struct kvm *kvm = + container_of(gi->origin, struct sie_page2, gisa)->kvm; + u8 pending_mask; + + pending_mask = gisa_get_ipm_or_restore_iam(gi); + if (pending_mask) { + __airqs_kick_single_vcpu(kvm, pending_mask); + hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); + return HRTIMER_RESTART; + }; + + return HRTIMER_NORESTART; +} + +#define NULL_GISA_ADDR 0x00000000UL +#define NONE_GISA_ADDR 0x00000001UL +#define GISA_ADDR_MASK 0xfffff000UL + +static void process_gib_alert_list(void) +{ + struct kvm_s390_gisa_interrupt *gi; + struct kvm_s390_gisa *gisa; + struct kvm *kvm; + u32 final, origin = 0UL; + + do { + /* + * If the NONE_GISA_ADDR is still stored in the alert list + * origin, we will leave the outer loop. No further GISA has + * been added to the alert list by millicode while processing + * the current alert list. + */ + final = (origin & NONE_GISA_ADDR); + /* + * Cut off the alert list and store the NONE_GISA_ADDR in the + * alert list origin to avoid further GAL interruptions. + * A new alert list can be build up by millicode in parallel + * for guests not in the yet cut-off alert list. When in the + * final loop, store the NULL_GISA_ADDR instead. This will re- + * enable GAL interruptions on the host again. + */ + origin = xchg(&gib->alert_list_origin, + (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR); + /* + * Loop through the just cut-off alert list and start the + * gisa timers to kick idle vcpus to consume the pending + * interruptions asap. + */ + while (origin & GISA_ADDR_MASK) { + gisa = (struct kvm_s390_gisa *)(u64)origin; + origin = gisa->next_alert; + gisa->next_alert = (u32)(u64)gisa; + kvm = container_of(gisa, struct sie_page2, gisa)->kvm; + gi = &kvm->arch.gisa_int; + if (hrtimer_active(&gi->timer)) + hrtimer_cancel(&gi->timer); + hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL); + } + } while (!final); + +} + +void kvm_s390_gisa_clear(struct kvm *kvm) +{ + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + + if (!gi->origin) + return; + gisa_clear_ipm(gi->origin); + VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin); +} + void kvm_s390_gisa_init(struct kvm *kvm) { - if (css_general_characteristics.aiv) { - kvm->arch.gisa = &kvm->arch.sie_page2->gisa; - VM_EVENT(kvm, 3, "gisa 0x%pK initialized", kvm->arch.gisa); - kvm_s390_gisa_clear(kvm); - } + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + + if (!css_general_characteristics.aiv) + return; + gi->origin = &kvm->arch.sie_page2->gisa; + gi->alert.mask = 0; + spin_lock_init(&gi->alert.ref_lock); + gi->expires = 50 * 1000; /* 50 usec */ + hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + gi->timer.function = gisa_vcpu_kicker; + memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); + gi->origin->next_alert = (u32)(u64)gi->origin; + VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); } void kvm_s390_gisa_destroy(struct kvm *kvm) { - if (!kvm->arch.gisa) + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + + if (!gi->origin) + return; + if (gi->alert.mask) + KVM_EVENT(3, "vm 0x%pK has unexpected iam 0x%02x", + kvm, gi->alert.mask); + while (gisa_in_alert_list(gi->origin)) + cpu_relax(); + hrtimer_cancel(&gi->timer); + gi->origin = NULL; +} + +/** + * kvm_s390_gisc_register - register a guest ISC + * + * @kvm: the kernel vm to work with + * @gisc: the guest interruption sub class to register + * + * The function extends the vm specific alert mask to use. + * The effective IAM mask in the GISA is updated as well + * in case the GISA is not part of the GIB alert list. + * It will be updated latest when the IAM gets restored + * by gisa_get_ipm_or_restore_iam(). + * + * Returns: the nonspecific ISC (NISC) the gib alert mechanism + * has registered with the channel subsystem. + * -ENODEV in case the vm uses no GISA + * -ERANGE in case the guest ISC is invalid + */ +int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc) +{ + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + + if (!gi->origin) + return -ENODEV; + if (gisc > MAX_ISC) + return -ERANGE; + + spin_lock(&gi->alert.ref_lock); + gi->alert.ref_count[gisc]++; + if (gi->alert.ref_count[gisc] == 1) { + gi->alert.mask |= 0x80 >> gisc; + gisa_set_iam(gi->origin, gi->alert.mask); + } + spin_unlock(&gi->alert.ref_lock); + + return gib->nisc; +} +EXPORT_SYMBOL_GPL(kvm_s390_gisc_register); + +/** + * kvm_s390_gisc_unregister - unregister a guest ISC + * + * @kvm: the kernel vm to work with + * @gisc: the guest interruption sub class to register + * + * The function reduces the vm specific alert mask to use. + * The effective IAM mask in the GISA is updated as well + * in case the GISA is not part of the GIB alert list. + * It will be updated latest when the IAM gets restored + * by gisa_get_ipm_or_restore_iam(). + * + * Returns: the nonspecific ISC (NISC) the gib alert mechanism + * has registered with the channel subsystem. + * -ENODEV in case the vm uses no GISA + * -ERANGE in case the guest ISC is invalid + * -EINVAL in case the guest ISC is not registered + */ +int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc) +{ + struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; + int rc = 0; + + if (!gi->origin) + return -ENODEV; + if (gisc > MAX_ISC) + return -ERANGE; + + spin_lock(&gi->alert.ref_lock); + if (gi->alert.ref_count[gisc] == 0) { + rc = -EINVAL; + goto out; + } + gi->alert.ref_count[gisc]--; + if (gi->alert.ref_count[gisc] == 0) { + gi->alert.mask &= ~(0x80 >> gisc); + gisa_set_iam(gi->origin, gi->alert.mask); + } +out: + spin_unlock(&gi->alert.ref_lock); + + return rc; +} +EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister); + +static void gib_alert_irq_handler(struct airq_struct *airq) +{ + inc_irq_stat(IRQIO_GAL); + process_gib_alert_list(); +} + +static struct airq_struct gib_alert_irq = { + .handler = gib_alert_irq_handler, + .lsi_ptr = &gib_alert_irq.lsi_mask, +}; + +void kvm_s390_gib_destroy(void) +{ + if (!gib) return; - kvm->arch.gisa = NULL; + chsc_sgib(0); + unregister_adapter_interrupt(&gib_alert_irq); + free_page((unsigned long)gib); + gib = NULL; +} + +int kvm_s390_gib_init(u8 nisc) +{ + int rc = 0; + + if (!css_general_characteristics.aiv) { + KVM_EVENT(3, "%s", "gib not initialized, no AIV facility"); + goto out; + } + + gib = (struct kvm_s390_gib *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + if (!gib) { + rc = -ENOMEM; + goto out; + } + + gib_alert_irq.isc = nisc; + if (register_adapter_interrupt(&gib_alert_irq)) { + pr_err("Registering the GIB alert interruption handler failed\n"); + rc = -EIO; + goto out_free_gib; + } + + gib->nisc = nisc; + if (chsc_sgib((u32)(u64)gib)) { + pr_err("Associating the GIB with the AIV facility failed\n"); + free_page((unsigned long)gib); + gib = NULL; + rc = -EIO; + goto out_unreg_gal; + } + + KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc); + goto out; + +out_unreg_gal: + unregister_adapter_interrupt(&gib_alert_irq); +out_free_gib: + free_page((unsigned long)gib); + gib = NULL; +out: + return rc; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f4bc58a53b9..4638303ba6a8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -432,11 +432,18 @@ int kvm_arch_init(void *opaque) /* Register floating interrupt controller interface. */ rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); if (rc) { - pr_err("Failed to register FLIC rc=%d\n", rc); + pr_err("A FLIC registration call failed with rc=%d\n", rc); goto out_debug_unreg; } + + rc = kvm_s390_gib_init(GAL_ISC); + if (rc) + goto out_gib_destroy; + return 0; +out_gib_destroy: + kvm_s390_gib_destroy(); out_debug_unreg: debug_unregister(kvm_s390_dbf); return rc; @@ -444,6 +451,7 @@ out_debug_unreg: void kvm_arch_exit(void) { + kvm_s390_gib_destroy(); debug_unregister(kvm_s390_dbf); } @@ -1258,11 +1266,65 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, static int kvm_s390_set_processor_subfunc(struct kvm *kvm, struct kvm_device_attr *attr) { - /* - * Once supported by kernel + hw, we have to store the subfunctions - * in kvm->arch and remember that user space configured them. - */ - return -ENXIO; + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr, + sizeof(struct kvm_s390_vm_cpu_subfunc))) { + mutex_unlock(&kvm->lock); + return -EFAULT; + } + mutex_unlock(&kvm->lock); + + VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); + VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); + VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); + VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); + VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); + VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); + VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); + VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); + VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); + VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); + VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); + VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); + VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); + VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); + + return 0; } static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) @@ -1381,12 +1443,56 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, static int kvm_s390_get_processor_subfunc(struct kvm *kvm, struct kvm_device_attr *attr) { - /* - * Once we can actually configure subfunctions (kernel + hw support), - * we have to check if they were already set by user space, if so copy - * them from kvm->arch. - */ - return -ENXIO; + if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs, + sizeof(struct kvm_s390_vm_cpu_subfunc))) + return -EFAULT; + + VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2], + ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]); + VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]); + VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]); + VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]); + VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.km)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]); + VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]); + VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]); + VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]); + VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]); + VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]); + VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]); + VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]); + VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]); + VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0], + ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]); + + return 0; } static int kvm_s390_get_machine_subfunc(struct kvm *kvm, @@ -1395,8 +1501,55 @@ static int kvm_s390_get_machine_subfunc(struct kvm *kvm, if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, sizeof(struct kvm_s390_vm_cpu_subfunc))) return -EFAULT; + + VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.plo)[0], + ((unsigned long *) &kvm_s390_available_subfunc.plo)[1], + ((unsigned long *) &kvm_s390_available_subfunc.plo)[2], + ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]); + VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0], + ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]); + VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]); + VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]); + VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.km)[0], + ((unsigned long *) &kvm_s390_available_subfunc.km)[1]); + VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]); + VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0], + ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]); + VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0], + ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]); + VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]); + VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]); + VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]); + VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0], + ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]); + VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0], + ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]); + VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx", + ((unsigned long *) &kvm_s390_available_subfunc.kma)[0], + ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]); + return 0; } + static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -1514,10 +1667,9 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR_FEAT: case KVM_S390_VM_CPU_MACHINE_FEAT: case KVM_S390_VM_CPU_MACHINE_SUBFUNC: + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: ret = 0; break; - /* configuring subfunctions is not supported yet */ - case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: default: ret = -ENXIO; break; @@ -2209,6 +2361,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.sie_page2) goto out_err; + kvm->arch.sie_page2->kvm = kvm; kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list; for (i = 0; i < kvm_s390_fac_size(); i++) { @@ -2218,6 +2371,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] & kvm_s390_fac_base[i]; } + kvm->arch.model.subfuncs = kvm_s390_available_subfunc; /* we are always in czam mode - even on pre z14 machines */ set_kvm_facility(kvm->arch.model.fac_mask, 138); @@ -2812,7 +2966,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->icpua = id; spin_lock_init(&vcpu->arch.local_int.lock); - vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa; + vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin; if (vcpu->arch.sie_block->gd && sclp.has_gisaf) vcpu->arch.sie_block->gd |= GISA_FORMAT1; seqcount_init(&vcpu->arch.cputm_seqcount); @@ -3458,6 +3612,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) kvm_s390_patch_guest_per_regs(vcpu); } + clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask); + vcpu->arch.sie_block->icptcode = 0; cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); @@ -4293,12 +4449,12 @@ static int __init kvm_s390_init(void) int i; if (!sclp.has_sief2) { - pr_info("SIE not available\n"); + pr_info("SIE is not available\n"); return -ENODEV; } if (nested && hpage) { - pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently"); + pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n"); return -EINVAL; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1f6e36cdce0d..6d9448dbd052 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -67,7 +67,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu) static inline int is_vcpu_idle(struct kvm_vcpu *vcpu) { - return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.float_int.idle_mask); + return test_bit(vcpu->vcpu_id, vcpu->kvm->arch.idle_mask); } static inline int kvm_is_ucontrol(struct kvm *kvm) @@ -381,6 +381,8 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, void kvm_s390_gisa_init(struct kvm *kvm); void kvm_s390_gisa_clear(struct kvm *kvm); void kvm_s390_gisa_destroy(struct kvm *kvm); +int kvm_s390_gib_init(u8 nisc); +void kvm_s390_gib_destroy(void); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index a4a41421c5e2..aca09be2373e 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -938,7 +938,7 @@ static int ubd_add(int n, char **error_out) ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); if (IS_ERR(ubd_dev->queue)) { err = PTR_ERR(ubd_dev->queue); - goto out_cleanup; + goto out_cleanup_tags; } ubd_dev->queue->queuedata = ubd_dev; @@ -968,8 +968,8 @@ out: out_cleanup_tags: blk_mq_free_tag_set(&ubd_dev->tag_set); -out_cleanup: - blk_cleanup_queue(ubd_dev->queue); + if (!(IS_ERR(ubd_dev->queue))) + blk_cleanup_queue(ubd_dev->queue); goto out; } diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index d2c17dd74620..b3f7b3ca896d 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -16,14 +16,12 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> -#include <sys/types.h> #include <sys/socket.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <netinet/ether.h> #include <linux/if_ether.h> #include <linux/if_packet.h> -#include <sys/socket.h> #include <sys/wait.h> #include <sys/uio.h> #include <linux/virtio_net.h> @@ -31,7 +29,6 @@ #include <stdlib.h> #include <os.h> #include <um_malloc.h> -#include <sys/uio.h> #include "vector_user.h" #define ID_GRE 0 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 180373360e34..a5db4475e72d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> #include <asm/kvm_page_track.h> +#include <asm/kvm_vcpu_regs.h> #include <asm/hyperv-tlfs.h> #define KVM_MAX_VCPUS 288 @@ -137,23 +138,23 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define ASYNC_PF_PER_VCPU 64 enum kvm_reg { - VCPU_REGS_RAX = 0, - VCPU_REGS_RCX = 1, - VCPU_REGS_RDX = 2, - VCPU_REGS_RBX = 3, - VCPU_REGS_RSP = 4, - VCPU_REGS_RBP = 5, - VCPU_REGS_RSI = 6, - VCPU_REGS_RDI = 7, + VCPU_REGS_RAX = __VCPU_REGS_RAX, + VCPU_REGS_RCX = __VCPU_REGS_RCX, + VCPU_REGS_RDX = __VCPU_REGS_RDX, + VCPU_REGS_RBX = __VCPU_REGS_RBX, + VCPU_REGS_RSP = __VCPU_REGS_RSP, + VCPU_REGS_RBP = __VCPU_REGS_RBP, + VCPU_REGS_RSI = __VCPU_REGS_RSI, + VCPU_REGS_RDI = __VCPU_REGS_RDI, #ifdef CONFIG_X86_64 - VCPU_REGS_R8 = 8, - VCPU_REGS_R9 = 9, - VCPU_REGS_R10 = 10, - VCPU_REGS_R11 = 11, - VCPU_REGS_R12 = 12, - VCPU_REGS_R13 = 13, - VCPU_REGS_R14 = 14, - VCPU_REGS_R15 = 15, + VCPU_REGS_R8 = __VCPU_REGS_R8, + VCPU_REGS_R9 = __VCPU_REGS_R9, + VCPU_REGS_R10 = __VCPU_REGS_R10, + VCPU_REGS_R11 = __VCPU_REGS_R11, + VCPU_REGS_R12 = __VCPU_REGS_R12, + VCPU_REGS_R13 = __VCPU_REGS_R13, + VCPU_REGS_R14 = __VCPU_REGS_R14, + VCPU_REGS_R15 = __VCPU_REGS_R15, #endif VCPU_REGS_RIP, NR_VCPU_REGS @@ -319,6 +320,7 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; bool unsync; + bool mmio_cached; /* * The following two entries are used to key the shadow page in the @@ -333,10 +335,6 @@ struct kvm_mmu_page { int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ - - /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ - unsigned long mmu_valid_gen; - DECLARE_BITMAP(unsync_child_bitmap, 512); #ifdef CONFIG_X86_32 @@ -848,13 +846,11 @@ struct kvm_arch { unsigned int n_requested_mmu_pages; unsigned int n_max_mmu_pages; unsigned int indirect_shadow_pages; - unsigned long mmu_valid_gen; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. */ struct list_head active_mmu_pages; - struct list_head zapped_obsolete_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -1255,7 +1251,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots); +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); diff --git a/arch/x86/include/asm/kvm_vcpu_regs.h b/arch/x86/include/asm/kvm_vcpu_regs.h new file mode 100644 index 000000000000..1af2cb59233b --- /dev/null +++ b/arch/x86/include/asm/kvm_vcpu_regs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_KVM_VCPU_REGS_H +#define _ASM_X86_KVM_VCPU_REGS_H + +#define __VCPU_REGS_RAX 0 +#define __VCPU_REGS_RCX 1 +#define __VCPU_REGS_RDX 2 +#define __VCPU_REGS_RBX 3 +#define __VCPU_REGS_RSP 4 +#define __VCPU_REGS_RBP 5 +#define __VCPU_REGS_RSI 6 +#define __VCPU_REGS_RDI 7 + +#ifdef CONFIG_X86_64 +#define __VCPU_REGS_R8 8 +#define __VCPU_REGS_R9 9 +#define __VCPU_REGS_R10 10 +#define __VCPU_REGS_R11 11 +#define __VCPU_REGS_R12 12 +#define __VCPU_REGS_R13 13 +#define __VCPU_REGS_R14 14 +#define __VCPU_REGS_R15 15 +#endif + +#endif /* _ASM_X86_KVM_VCPU_REGS_H */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e811d4d1c824..904494b924c1 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -104,12 +104,8 @@ static u64 kvm_sched_clock_read(void) static inline void kvm_sched_clock_init(bool stable) { - if (!stable) { - pv_ops.time.sched_clock = kvm_clock_read; + if (!stable) clear_sched_clock_stable(); - return; - } - kvm_sched_clock_offset = kvm_clock_read(); pv_ops.time.sched_clock = kvm_sched_clock_read; @@ -355,6 +351,20 @@ void __init kvmclock_init(void) machine_ops.crash_shutdown = kvm_crash_shutdown; #endif kvm_get_preset_lpj(); + + /* + * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate + * with P/T states and does not stop in deep C-states. + * + * Invariant TSC exposed by host means kvmclock is not necessary: + * can use TSC as clocksource. + * + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + !check_tsc_unstable()) + kvm_clock.rating = 299; + clocksource_register_hz(&kvm_clock, NSEC_PER_SEC); pv_info.name = "KVM"; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c07958b59f50..fd3951638ae4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -405,7 +405,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | - F(CLDEMOTE); + F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 89d20ed1d2e8..27c43525a05f 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) mutex_lock(&hv->hv_lock); ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); mutex_unlock(&hv->hv_lock); if (ret >= 0) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index af192895b1fc..4a6dc54cc12b 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pid_t pid_nr; int ret; - pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL); + pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT); if (!pit) return NULL; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index bdcd4139eca9..8b38bb4868a6 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm) struct kvm_pic *s; int ret; - s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); + s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT); if (!s) return -ENOMEM; spin_lock_init(&s->lock); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 4e822ad363f3..1add1bc881e2 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm) struct kvm_ioapic *ioapic; int ret; - ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT); if (!ioapic) return -ENOMEM; spin_lock_init(&ioapic->lock); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4b6c2da7265c..991fdf7fc17f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm) max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); new = kvzalloc(sizeof(struct kvm_apic_map) + - sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL); + sizeof(struct kvm_lapic *) * ((u64)max_id + 1), + GFP_KERNEL_ACCOUNT); if (!new) goto out; @@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) ASSERT(vcpu != NULL); apic_debug("apic_init %d\n", vcpu->vcpu_id); - apic = kzalloc(sizeof(*apic), GFP_KERNEL); + apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); if (!apic) goto nomem; vcpu->arch.apic = apic; - apic->regs = (void *)get_zeroed_page(GFP_KERNEL); + apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (!apic->regs) { printk(KERN_ERR "malloc apic regs error for vcpu %x\n", vcpu->vcpu_id); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f2d1d230d5b8..7837ab001d80 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -109,9 +109,11 @@ module_param(dbg, bool, 0644); (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1)) -#define PT64_BASE_ADDR_MASK __sme_clr((((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))) -#define PT64_DIR_BASE_ADDR_MASK \ - (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1)) +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +#define PT64_BASE_ADDR_MASK (physical_mask & ~(u64)(PAGE_SIZE-1)) +#else +#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) +#endif #define PT64_LVL_ADDR_MASK(level) \ (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ * PT64_LEVEL_BITS))) - 1)) @@ -330,53 +332,56 @@ static inline bool is_access_track_spte(u64 spte) } /* - * the low bit of the generation number is always presumed to be zero. - * This disables mmio caching during memslot updates. The concept is - * similar to a seqcount but instead of retrying the access we just punt - * and ignore the cache. + * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of + * the memslots generation and is derived as follows: * - * spte bits 3-11 are used as bits 1-9 of the generation number, - * the bits 52-61 are used as bits 10-19 of the generation number. + * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 + * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 + * + * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in + * the MMIO generation number, as doing so would require stealing a bit from + * the "real" generation number and thus effectively halve the maximum number + * of MMIO generations that can be handled before encountering a wrap (which + * requires a full MMU zap). The flag is instead explicitly queried when + * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_LOW_SHIFT 2 -#define MMIO_SPTE_GEN_HIGH_SHIFT 52 +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(18, 0) -#define MMIO_GEN_SHIFT 20 -#define MMIO_GEN_LOW_SHIFT 10 -#define MMIO_GEN_LOW_MASK ((1 << MMIO_GEN_LOW_SHIFT) - 2) -#define MMIO_GEN_MASK ((1 << MMIO_GEN_SHIFT) - 1) +#define MMIO_SPTE_GEN_LOW_START 3 +#define MMIO_SPTE_GEN_LOW_END 11 +#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) -static u64 generation_mmio_spte_mask(unsigned int gen) +#define MMIO_SPTE_GEN_HIGH_START 52 +#define MMIO_SPTE_GEN_HIGH_END 61 +#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ + MMIO_SPTE_GEN_HIGH_START) +static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; - WARN_ON(gen & ~MMIO_GEN_MASK); + WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); - mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT; - mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT; + mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; return mask; } -static unsigned int get_mmio_spte_generation(u64 spte) +static u64 get_mmio_spte_generation(u64 spte) { - unsigned int gen; + u64 gen; spte &= ~shadow_mmio_mask; - gen = (spte >> MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_GEN_LOW_MASK; - gen |= (spte >> MMIO_SPTE_GEN_HIGH_SHIFT) << MMIO_GEN_LOW_SHIFT; + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; return gen; } -static unsigned int kvm_current_mmio_generation(struct kvm_vcpu *vcpu) -{ - return kvm_vcpu_memslots(vcpu)->generation & MMIO_GEN_MASK; -} - static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, unsigned access) { - unsigned int gen = kvm_current_mmio_generation(vcpu); + u64 gen = kvm_vcpu_memslots(vcpu)->generation & MMIO_SPTE_GEN_MASK; u64 mask = generation_mmio_spte_mask(gen); u64 gpa = gfn << PAGE_SHIFT; @@ -386,6 +391,8 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn, mask |= (gpa & shadow_nonpresent_or_rsvd_mask) << shadow_nonpresent_or_rsvd_mask_len; + page_header(__pa(sptep))->mmio_cached = true; + trace_mark_mmio_spte(sptep, gfn, access, gen); mmu_spte_set(sptep, mask); } @@ -407,7 +414,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) static unsigned get_mmio_spte_access(u64 spte) { - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask; + u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask; return (spte & ~mask) & ~PAGE_MASK; } @@ -424,9 +431,13 @@ static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) { - unsigned int kvm_gen, spte_gen; + u64 kvm_gen, spte_gen, gen; - kvm_gen = kvm_current_mmio_generation(vcpu); + gen = kvm_vcpu_memslots(vcpu)->generation; + if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) + return false; + + kvm_gen = gen & MMIO_SPTE_GEN_MASK; spte_gen = get_mmio_spte_generation(spte); trace_check_mmio_spte(spte, kvm_gen, spte_gen); @@ -959,7 +970,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); + obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT); if (!obj) return cache->nobjs >= min ? 0 : -ENOMEM; cache->objects[cache->nobjs++] = obj; @@ -2049,12 +2060,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct if (!direct) sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); - - /* - * The active_mmu_pages list is the FIFO list, do not move the - * page until it is zapped. kvm_zap_obsolete_pages depends on - * this feature. See the comments in kvm_zap_obsolete_pages(). - */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; @@ -2195,23 +2200,15 @@ static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) --kvm->stat.mmu_unsync; } -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, - struct list_head *invalid_list); +static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, + struct list_head *invalid_list); static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -/* - * NOTE: we should pay more attention on the zapped-obsolete page - * (is_obsolete_sp(sp) && sp->role.invalid) when you do hash list walk - * since it has been deleted from active_mmu_pages but still can be found - * at hast list. - * - * for_each_valid_sp() has skipped that kind of pages. - */ #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ - if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ + if ((_sp)->role.invalid) { \ } else #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ @@ -2231,18 +2228,28 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return true; } +static bool kvm_mmu_remote_flush_or_zap(struct kvm *kvm, + struct list_head *invalid_list, + bool remote_flush) +{ + if (!remote_flush && !list_empty(invalid_list)) + return false; + + if (!list_empty(invalid_list)) + kvm_mmu_commit_zap_page(kvm, invalid_list); + else + kvm_flush_remote_tlbs(kvm); + return true; +} + static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, struct list_head *invalid_list, bool remote_flush, bool local_flush) { - if (!list_empty(invalid_list)) { - kvm_mmu_commit_zap_page(vcpu->kvm, invalid_list); + if (kvm_mmu_remote_flush_or_zap(vcpu->kvm, invalid_list, remote_flush)) return; - } - if (remote_flush) - kvm_flush_remote_tlbs(vcpu->kvm); - else if (local_flush) + if (local_flush) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } @@ -2253,11 +2260,6 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { } static void mmu_audit_disable(void) { } #endif -static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - return unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); -} - static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { @@ -2482,7 +2484,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (level > PT_PAGE_TABLE_LEVEL && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } - sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); @@ -2668,17 +2669,22 @@ static int mmu_zap_unsync_children(struct kvm *kvm, return zapped; } -static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, - struct list_head *invalid_list) +static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, + struct kvm_mmu_page *sp, + struct list_head *invalid_list, + int *nr_zapped) { - int ret; + bool list_unstable; trace_kvm_mmu_prepare_zap_page(sp); ++kvm->stat.mmu_shadow_zapped; - ret = mmu_zap_unsync_children(kvm, sp, invalid_list); + *nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list); kvm_mmu_page_unlink_children(kvm, sp); kvm_mmu_unlink_parents(kvm, sp); + /* Zapping children means active_mmu_pages has become unstable. */ + list_unstable = *nr_zapped; + if (!sp->role.invalid && !sp->role.direct) unaccount_shadowed(kvm, sp); @@ -2686,22 +2692,27 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, kvm_unlink_unsync_page(kvm, sp); if (!sp->root_count) { /* Count self */ - ret++; + (*nr_zapped)++; list_move(&sp->link, invalid_list); kvm_mod_used_mmu_pages(kvm, -1); } else { list_move(&sp->link, &kvm->arch.active_mmu_pages); - /* - * The obsolete pages can not be used on any vcpus. - * See the comments in kvm_mmu_invalidate_zap_all_pages(). - */ - if (!sp->role.invalid && !is_obsolete_sp(kvm, sp)) + if (!sp->role.invalid) kvm_reload_remote_mmus(kvm); } sp->role.invalid = 1; - return ret; + return list_unstable; +} + +static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, + struct list_head *invalid_list) +{ + int nr_zapped; + + __kvm_mmu_prepare_zap_page(kvm, sp, invalid_list, &nr_zapped); + return nr_zapped; } static void kvm_mmu_commit_zap_page(struct kvm *kvm, @@ -3703,7 +3714,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) u64 *lm_root; - lm_root = (void*)get_zeroed_page(GFP_KERNEL); + lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (lm_root == NULL) return 1; @@ -4204,14 +4215,6 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, return false; if (cached_root_available(vcpu, new_cr3, new_role)) { - /* - * It is possible that the cached previous root page is - * obsolete because of a change in the MMU - * generation number. However, that is accompanied by - * KVM_REQ_MMU_RELOAD, which will free the root that we - * have set here and allocate a new one. - */ - kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); if (!skip_tlb_flush) { kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); @@ -5486,6 +5489,76 @@ void kvm_disable_tdp(void) } EXPORT_SYMBOL_GPL(kvm_disable_tdp); + +/* The return value indicates if tlb flush on all vcpus is needed. */ +typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); + +/* The caller should hold mmu-lock before calling this function. */ +static __always_inline bool +slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) +{ + struct slot_rmap_walk_iterator iterator; + bool flush = false; + + for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, + end_gfn, &iterator) { + if (iterator.rmap) + flush |= fn(kvm, iterator.rmap); + + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + if (flush && lock_flush_tlb) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + cond_resched_lock(&kvm->mmu_lock); + } + } + + if (flush && lock_flush_tlb) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + + return flush; +} + +static __always_inline bool +slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, int start_level, int end_level, + bool lock_flush_tlb) +{ + return slot_handle_level_range(kvm, memslot, fn, start_level, + end_level, memslot->base_gfn, + memslot->base_gfn + memslot->npages - 1, + lock_flush_tlb); +} + +static __always_inline bool +slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); +} + +static __always_inline bool +slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, + PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); +} + +static __always_inline bool +slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, + slot_level_handler fn, bool lock_flush_tlb) +{ + return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, + PT_PAGE_TABLE_LEVEL, lock_flush_tlb); +} + static void free_mmu_pages(struct kvm_vcpu *vcpu) { free_page((unsigned long)vcpu->arch.mmu->pae_root); @@ -5505,7 +5578,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) * Therefore we need to allocate shadow page tables in the first * 4GB of memory, which happens to fit the DMA32 zone. */ - page = alloc_page(GFP_KERNEL | __GFP_DMA32); + page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); if (!page) return -ENOMEM; @@ -5543,105 +5616,62 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_page_track_notifier_node *node) { - kvm_mmu_invalidate_zap_all_pages(kvm); -} - -void kvm_mmu_init_vm(struct kvm *kvm) -{ - struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; - - node->track_write = kvm_mmu_pte_write; - node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; - kvm_page_track_register_notifier(kvm, node); -} + struct kvm_mmu_page *sp; + LIST_HEAD(invalid_list); + unsigned long i; + bool flush; + gfn_t gfn; -void kvm_mmu_uninit_vm(struct kvm *kvm) -{ - struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock(&kvm->mmu_lock); - kvm_page_track_unregister_notifier(kvm, node); -} + if (list_empty(&kvm->arch.active_mmu_pages)) + goto out_unlock; -/* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); + flush = slot_handle_all_level(kvm, slot, kvm_zap_rmapp, false); -/* The caller should hold mmu-lock before calling this function. */ -static __always_inline bool -slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, int start_level, int end_level, - gfn_t start_gfn, gfn_t end_gfn, bool lock_flush_tlb) -{ - struct slot_rmap_walk_iterator iterator; - bool flush = false; + for (i = 0; i < slot->npages; i++) { + gfn = slot->base_gfn + i; - for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn, - end_gfn, &iterator) { - if (iterator.rmap) - flush |= fn(kvm, iterator.rmap); + for_each_valid_sp(kvm, sp, gfn) { + if (sp->gfn != gfn) + continue; + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + } if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { - if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); - flush = false; - } + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); + flush = false; cond_resched_lock(&kvm->mmu_lock); } } + kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); - if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); - flush = false; - } - - return flush; +out_unlock: + spin_unlock(&kvm->mmu_lock); } -static __always_inline bool -slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, int start_level, int end_level, - bool lock_flush_tlb) +void kvm_mmu_init_vm(struct kvm *kvm) { - return slot_handle_level_range(kvm, memslot, fn, start_level, - end_level, memslot->base_gfn, - memslot->base_gfn + memslot->npages - 1, - lock_flush_tlb); -} + struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; -static __always_inline bool -slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) -{ - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + node->track_write = kvm_mmu_pte_write; + node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot; + kvm_page_track_register_notifier(kvm, node); } -static __always_inline bool -slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) +void kvm_mmu_uninit_vm(struct kvm *kvm) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); -} + struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; -static __always_inline bool -slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, - slot_level_handler fn, bool lock_flush_tlb) -{ - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_PAGE_TABLE_LEVEL, lock_flush_tlb); + kvm_page_track_unregister_notifier(kvm, node); } void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - bool flush_tlb = true; - bool flush = false; int i; - if (kvm_available_flush_tlb_with_range()) - flush_tlb = false; - spin_lock(&kvm->mmu_lock); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { slots = __kvm_memslots(kvm, i); @@ -5653,17 +5683,12 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) if (start >= end) continue; - flush |= slot_handle_level_range(kvm, memslot, - kvm_zap_rmapp, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, start, - end - 1, flush_tlb); + slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, + PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, + start, end - 1, true); } } - if (flush) - kvm_flush_remote_tlbs_with_address(kvm, gfn_start, - gfn_end - gfn_start + 1); - spin_unlock(&kvm->mmu_lock); } @@ -5815,101 +5840,58 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty); -#define BATCH_ZAP_PAGES 10 -static void kvm_zap_obsolete_pages(struct kvm *kvm) +static void __kvm_mmu_zap_all(struct kvm *kvm, bool mmio_only) { struct kvm_mmu_page *sp, *node; - int batch = 0; + LIST_HEAD(invalid_list); + int ign; + spin_lock(&kvm->mmu_lock); restart: - list_for_each_entry_safe_reverse(sp, node, - &kvm->arch.active_mmu_pages, link) { - int ret; - - /* - * No obsolete page exists before new created page since - * active_mmu_pages is the FIFO list. - */ - if (!is_obsolete_sp(kvm, sp)) - break; - - /* - * Since we are reversely walking the list and the invalid - * list will be moved to the head, skip the invalid page - * can help us to avoid the infinity list walking. - */ - if (sp->role.invalid) + list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) { + if (mmio_only && !sp->mmio_cached) continue; - - /* - * Need not flush tlb since we only zap the sp with invalid - * generation number. - */ - if (batch >= BATCH_ZAP_PAGES && - cond_resched_lock(&kvm->mmu_lock)) { - batch = 0; + if (sp->role.invalid && sp->root_count) + continue; + if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign)) { + WARN_ON_ONCE(mmio_only); goto restart; } - - ret = kvm_mmu_prepare_zap_page(kvm, sp, - &kvm->arch.zapped_obsolete_pages); - batch += ret; - - if (ret) + if (cond_resched_lock(&kvm->mmu_lock)) goto restart; } - /* - * Should flush tlb before free page tables since lockless-walking - * may use the pages. - */ - kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages); -} - -/* - * Fast invalidate all shadow pages and use lock-break technique - * to zap obsolete pages. - * - * It's required when memslot is being deleted or VM is being - * destroyed, in these cases, we should ensure that KVM MMU does - * not use any resource of the being-deleted slot or all slots - * after calling the function. - */ -void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm) -{ - spin_lock(&kvm->mmu_lock); - trace_kvm_mmu_invalidate_zap_all_pages(kvm); - kvm->arch.mmu_valid_gen++; - - /* - * Notify all vcpus to reload its shadow page table - * and flush TLB. Then all vcpus will switch to new - * shadow page table with the new mmu_valid_gen. - * - * Note: we should do this under the protection of - * mmu-lock, otherwise, vcpu would purge shadow page - * but miss tlb flush. - */ - kvm_reload_remote_mmus(kvm); - - kvm_zap_obsolete_pages(kvm); + kvm_mmu_commit_zap_page(kvm, &invalid_list); spin_unlock(&kvm->mmu_lock); } -static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm) +void kvm_mmu_zap_all(struct kvm *kvm) { - return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages)); + return __kvm_mmu_zap_all(kvm, false); } -void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen) { + WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS); + + gen &= MMIO_SPTE_GEN_MASK; + /* - * The very rare case: if the generation-number is round, + * Generation numbers are incremented in multiples of the number of + * address spaces in order to provide unique generations across all + * address spaces. Strip what is effectively the address space + * modifier prior to checking for a wrap of the MMIO generation so + * that a wrap in any address space is detected. + */ + gen &= ~((u64)KVM_ADDRESS_SPACE_NUM - 1); + + /* + * The very rare case: if the MMIO generation number has wrapped, * zap all shadow pages. */ - if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) { + if (unlikely(gen == 0)) { kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n"); - kvm_mmu_invalidate_zap_all_pages(kvm); + __kvm_mmu_zap_all(kvm, true); } } @@ -5940,24 +5922,16 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) * want to shrink a VM that only started to populate its MMU * anyway. */ - if (!kvm->arch.n_used_mmu_pages && - !kvm_has_zapped_obsolete_pages(kvm)) + if (!kvm->arch.n_used_mmu_pages) continue; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - if (kvm_has_zapped_obsolete_pages(kvm)) { - kvm_mmu_commit_zap_page(kvm, - &kvm->arch.zapped_obsolete_pages); - goto unlock; - } - if (prepare_zap_oldest_mmu_page(kvm, &invalid_list)) freed++; kvm_mmu_commit_zap_page(kvm, &invalid_list); -unlock: spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7b333147c4a..bbdc60f2fae8 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -203,7 +203,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, return -(u32)fault & errcode; } -void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index c73bf4e4988c..9f6c855a0043 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -8,18 +8,16 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define KVM_MMU_PAGE_FIELDS \ - __field(unsigned long, mmu_valid_gen) \ - __field(__u64, gfn) \ - __field(__u32, role) \ - __field(__u32, root_count) \ +#define KVM_MMU_PAGE_FIELDS \ + __field(__u64, gfn) \ + __field(__u32, role) \ + __field(__u32, root_count) \ __field(bool, unsync) -#define KVM_MMU_PAGE_ASSIGN(sp) \ - __entry->mmu_valid_gen = sp->mmu_valid_gen; \ - __entry->gfn = sp->gfn; \ - __entry->role = sp->role.word; \ - __entry->root_count = sp->root_count; \ +#define KVM_MMU_PAGE_ASSIGN(sp) \ + __entry->gfn = sp->gfn; \ + __entry->role = sp->role.word; \ + __entry->root_count = sp->root_count; \ __entry->unsync = sp->unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ @@ -31,9 +29,8 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gen %lx gfn %llx l%u%s q%u%s %s%s" \ + trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ - __entry->mmu_valid_gen, \ __entry->gfn, role.level, \ role.cr4_pae ? " pae" : "", \ role.quadrant, \ @@ -283,27 +280,6 @@ TRACE_EVENT( ); TRACE_EVENT( - kvm_mmu_invalidate_zap_all_pages, - TP_PROTO(struct kvm *kvm), - TP_ARGS(kvm), - - TP_STRUCT__entry( - __field(unsigned long, mmu_valid_gen) - __field(unsigned int, mmu_used_pages) - ), - - TP_fast_assign( - __entry->mmu_valid_gen = kvm->arch.mmu_valid_gen; - __entry->mmu_used_pages = kvm->arch.n_used_mmu_pages; - ), - - TP_printk("kvm-mmu-valid-gen %lx used_pages %x", - __entry->mmu_valid_gen, __entry->mmu_used_pages - ) -); - - -TRACE_EVENT( check_mmio_spte, TP_PROTO(u64 spte, unsigned int kvm_gen, unsigned int spte_gen), TP_ARGS(spte, kvm_gen, spte_gen), diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 3052a59a3065..fd04d462fdae 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot, for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) { slot->arch.gfn_track[i] = kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!slot->arch.gfn_track[i]) goto track_free; } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f13a3a24d360..b5b128a0a051 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -145,7 +145,6 @@ struct kvm_svm { /* Struct members for AVIC */ u32 avic_vm_id; - u32 ldr_mode; struct page *avic_logical_id_table_page; struct page *avic_physical_id_table_page; struct hlist_node hnode; @@ -236,6 +235,7 @@ struct vcpu_svm { bool nrips_enabled : 1; u32 ldr_reg; + u32 dfr_reg; struct page *avic_backing_page; u64 *avic_physical_id_cache; bool avic_is_running; @@ -1795,9 +1795,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = vmalloc(size); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); else - pages = kmalloc(size, GFP_KERNEL); + pages = kmalloc(size, GFP_KERNEL_ACCOUNT); if (!pages) return NULL; @@ -1865,7 +1866,9 @@ static void __unregister_enc_region_locked(struct kvm *kvm, static struct kvm *svm_vm_alloc(void) { - struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm)); + struct kvm_svm *kvm_svm = __vmalloc(sizeof(struct kvm_svm), + GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); return &kvm_svm->kvm; } @@ -1940,7 +1943,7 @@ static int avic_vm_init(struct kvm *kvm) return 0; /* Allocating physical APIC ID table (4KB) */ - p_page = alloc_page(GFP_KERNEL); + p_page = alloc_page(GFP_KERNEL_ACCOUNT); if (!p_page) goto free_avic; @@ -1948,7 +1951,7 @@ static int avic_vm_init(struct kvm *kvm) clear_page(page_address(p_page)); /* Allocating logical APIC ID table (4KB) */ - l_page = alloc_page(GFP_KERNEL); + l_page = alloc_page(GFP_KERNEL_ACCOUNT); if (!l_page) goto free_avic; @@ -2106,6 +2109,7 @@ static int avic_init_vcpu(struct vcpu_svm *svm) INIT_LIST_HEAD(&svm->ir_list); spin_lock_init(&svm->ir_list_lock); + svm->dfr_reg = APIC_DFR_FLAT; return ret; } @@ -2119,13 +2123,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) struct page *nested_msrpm_pages; int err; - svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); if (!svm) { err = -ENOMEM; goto out; } - svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); + svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); if (!svm->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; @@ -2137,19 +2142,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto free_svm; err = -ENOMEM; - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_KERNEL_ACCOUNT); if (!page) goto uninit; - msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); if (!msrpm_pages) goto free_page1; - nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + nested_msrpm_pages = alloc_pages(GFP_KERNEL_ACCOUNT, MSRPM_ALLOC_ORDER); if (!nested_msrpm_pages) goto free_page2; - hsave_page = alloc_page(GFP_KERNEL); + hsave_page = alloc_page(GFP_KERNEL_ACCOUNT); if (!hsave_page) goto free_page3; @@ -4565,8 +4570,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) return &logical_apic_id_table[index]; } -static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, - bool valid) +static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr) { bool flat; u32 *entry, new_entry; @@ -4579,31 +4583,39 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr, new_entry = READ_ONCE(*entry); new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); - if (valid) - new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; - else - new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK; + new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; WRITE_ONCE(*entry, new_entry); return 0; } +static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + bool flat = svm->dfr_reg == APIC_DFR_FLAT; + u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat); + + if (entry) + WRITE_ONCE(*entry, (u32) ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK); +} + static int avic_handle_ldr_update(struct kvm_vcpu *vcpu) { - int ret; + int ret = 0; struct vcpu_svm *svm = to_svm(vcpu); u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR); - if (!ldr) - return 1; + if (ldr == svm->ldr_reg) + return 0; - ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true); - if (ret && svm->ldr_reg) { - avic_ldr_write(vcpu, 0, svm->ldr_reg, false); - svm->ldr_reg = 0; - } else { + avic_invalidate_logical_id_entry(vcpu); + + if (ldr) + ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr); + + if (!ret) svm->ldr_reg = ldr; - } + return ret; } @@ -4637,27 +4649,16 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) return 0; } -static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) +static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); - u32 mod = (dfr >> 28) & 0xf; - /* - * We assume that all local APICs are using the same type. - * If this changes, we need to flush the AVIC logical - * APID id table. - */ - if (kvm_svm->ldr_mode == mod) - return 0; - - clear_page(page_address(kvm_svm->avic_logical_id_table_page)); - kvm_svm->ldr_mode = mod; + if (svm->dfr_reg == dfr) + return; - if (svm->ldr_reg) - avic_handle_ldr_update(vcpu); - return 0; + avic_invalidate_logical_id_entry(vcpu); + svm->dfr_reg = dfr; } static int avic_unaccel_trap_write(struct vcpu_svm *svm) @@ -5125,11 +5126,11 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - if (!kvm_vcpu_apicv_active(&svm->vcpu)) - return; - - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; - mark_dirty(vmcb, VMCB_INTR); + if (kvm_vcpu_apicv_active(vcpu)) + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + else + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + mark_dirty(vmcb, VMCB_AVIC); } static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) @@ -5195,7 +5196,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -6163,8 +6164,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) { if (avic_handle_apic_id_update(vcpu) != 0) return; - if (avic_handle_dfr_update(vcpu) != 0) - return; + avic_handle_dfr_update(vcpu); avic_handle_ldr_update(vcpu); } @@ -6311,7 +6311,7 @@ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) if (ret) return ret; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6361,7 +6361,7 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) return -EFAULT; - start = kzalloc(sizeof(*start), GFP_KERNEL); + start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT); if (!start) return -ENOMEM; @@ -6458,7 +6458,7 @@ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) return -EFAULT; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6535,7 +6535,7 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (copy_from_user(¶ms, measure, sizeof(params))) return -EFAULT; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6597,7 +6597,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6618,7 +6618,7 @@ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!sev_guest(kvm)) return -ENOTTY; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6646,7 +6646,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, struct sev_data_dbg *data; int ret; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) return -ENOMEM; @@ -6901,7 +6901,7 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) } ret = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); if (!data) goto e_unpin_memory; @@ -7007,7 +7007,7 @@ static int svm_register_enc_region(struct kvm *kvm, if (range->addr > ULONG_MAX || range->size > ULONG_MAX) return -EINVAL; - region = kzalloc(sizeof(*region), GFP_KERNEL); + region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT); if (!region) return -ENOMEM; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d737a51a53ca..f24a2c225070 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -211,7 +211,6 @@ static void free_nested(struct kvm_vcpu *vcpu) if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; - hrtimer_cancel(&vmx->nested.preemption_timer); vmx->nested.vmxon = false; vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); @@ -274,6 +273,7 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); + vmx_leave_nested(vcpu); vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); free_nested(vcpu); vcpu_put(vcpu); @@ -1980,17 +1980,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) prepare_vmcs02_early_full(vmx, vmcs12); /* - * HOST_RSP is normally set correctly in vmx_vcpu_run() just before - * entry, but only if the current (host) sp changed from the value - * we wrote last (vmx->host_rsp). This cache is no longer relevant - * if we switch vmcs, and rather than hold a separate cache per vmcs, - * here we just force the write to happen on entry. host_rsp will - * also be written unconditionally by nested_vmx_check_vmentry_hw() - * if we are doing early consistency checks via hardware. - */ - vmx->host_rsp = 0; - - /* * PIN CONTROLS */ exec_control = vmcs12->pin_based_vm_exec_control; @@ -2289,10 +2278,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } vmx_set_rflags(vcpu, vmcs12->guest_rflags); - vmx->nested.preemption_timer_expired = false; - if (nested_cpu_has_preemption_timer(vmcs12)) - vmx_start_preemption_timer(vcpu); - /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the * bitwise-or of what L1 wants to trap for L2, and what we want to * trap. Note that CR0.TS also needs updating - we do this later. @@ -2722,6 +2707,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; + bool vm_fail; if (!nested_early_check) return 0; @@ -2755,29 +2741,34 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - vmx->__launched = vmx->loaded_vmcs->launched; - asm( - /* Set HOST_RSP */ "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ - __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" - "mov %%" _ASM_SP ", %c[host_rsp](%1)\n\t" + "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" + "je 1f \n\t" + __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" + "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" + "1: \n\t" "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ /* Check if vmlaunch or vmresume is needed */ - "cmpl $0, %c[launched](%% " _ASM_CX")\n\t" + "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" + /* + * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set + * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail + * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the + * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. + */ "call vmx_vmenter\n\t" - /* Set vmx->fail accordingly */ - "setbe %c[fail](%% " _ASM_CX")\n\t" - : ASM_CALL_CONSTRAINT - : "c"(vmx), "d"((unsigned long)HOST_RSP), - [launched]"i"(offsetof(struct vcpu_vmx, __launched)), - [fail]"i"(offsetof(struct vcpu_vmx, fail)), - [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), + CC_SET(be) + : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) + : [HOST_RSP]"r"((unsigned long)HOST_RSP), + [loaded_vmcs]"r"(vmx->loaded_vmcs), + [launched]"i"(offsetof(struct loaded_vmcs, launched)), + [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), [wordsize]"i"(sizeof(ulong)) - : "rax", "cc", "memory" + : "cc", "memory" ); preempt_enable(); @@ -2787,10 +2778,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) if (vmx->msr_autoload.guest.nr) vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); - if (vmx->fail) { + if (vm_fail) { WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != VMXERR_ENTRY_INVALID_CONTROL_FIELD); - vmx->fail = 0; return 1; } @@ -2813,8 +2803,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) return 0; } -STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw); - static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); @@ -3031,6 +3019,15 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) kvm_make_request(KVM_REQ_EVENT, vcpu); /* + * Do not start the preemption timer hrtimer until after we know + * we are successful, so that only nested_vmx_vmexit needs to cancel + * the timer. + */ + vmx->nested.preemption_timer_expired = false; + if (nested_cpu_has_preemption_timer(vmcs12)) + vmx_start_preemption_timer(vcpu); + + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet * returned as far as L1 is concerned. It will only return (and set @@ -3450,13 +3447,10 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; - if (nested_cpu_has_preemption_timer(vmcs12)) { - if (vmcs12->vm_exit_controls & - VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) + if (nested_cpu_has_preemption_timer(vmcs12) && + vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) vmcs12->vmx_preemption_timer_value = vmx_get_preemption_timer_value(vcpu); - hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); - } /* * In some cases (usually, nested EPT), L2 is allowed to change its @@ -3864,6 +3858,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, leave_guest_mode(vcpu); + if (nested_cpu_has_preemption_timer(vmcs12)) + hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); + if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; @@ -3915,9 +3912,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx_flush_tlb(vcpu, true); } - /* This is needed for same reason as it was needed in prepare_vmcs02 */ - vmx->host_rsp = 0; - /* Unpin physical memory we referred to in vmcs02 */ if (vmx->nested.apic_access_page) { kvm_release_page_dirty(vmx->nested.apic_access_page); @@ -4035,25 +4029,50 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, /* Addr = segment_base + offset */ /* offset = base + [index * scale] + displacement */ off = exit_qualification; /* holds the displacement */ + if (addr_size == 1) + off = (gva_t)sign_extend64(off, 31); + else if (addr_size == 0) + off = (gva_t)sign_extend64(off, 15); if (base_is_valid) off += kvm_register_read(vcpu, base_reg); if (index_is_valid) off += kvm_register_read(vcpu, index_reg)<<scaling; vmx_get_segment(vcpu, &s, seg_reg); - *ret = s.base + off; + /* + * The effective address, i.e. @off, of a memory operand is truncated + * based on the address size of the instruction. Note that this is + * the *effective address*, i.e. the address prior to accounting for + * the segment's base. + */ if (addr_size == 1) /* 32 bit */ - *ret &= 0xffffffff; + off &= 0xffffffff; + else if (addr_size == 0) /* 16 bit */ + off &= 0xffff; /* Checks for #GP/#SS exceptions. */ exn = false; if (is_long_mode(vcpu)) { + /* + * The virtual/linear address is never truncated in 64-bit + * mode, e.g. a 32-bit address size can yield a 64-bit virtual + * address when using FS/GS with a non-zero base. + */ + *ret = s.base + off; + /* Long mode: #GP(0)/#SS(0) if the memory address is in a * non-canonical form. This is the only check on the memory * destination for long mode! */ exn = is_noncanonical_address(*ret, vcpu); - } else if (is_protmode(vcpu)) { + } else { + /* + * When not in long mode, the virtual/linear address is + * unconditionally truncated to 32 bits regardless of the + * address size. + */ + *ret = (s.base + off) & 0xffffffff; + /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -4077,10 +4096,16 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification, /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); - /* Protected mode: #GP(0)/#SS(0) if the memory - * operand is outside the segment limit. + + /* + * Protected mode: #GP(0)/#SS(0) if the memory operand is + * outside the segment limit. All CPUs that support VMX ignore + * limit checks for flat segments, i.e. segments with base==0, + * limit==0xffffffff and of type expand-up data or code. */ - exn = exn || (off + sizeof(u64) > s.limit); + if (!(s.base == 0 && s.limit == 0xffffffff && + ((s.type & 8) || !(s.type & 4)))) + exn = exn || (off + sizeof(u64) > s.limit); } if (exn) { kvm_queue_exception_e(vcpu, @@ -4145,11 +4170,11 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) if (r < 0) goto out_vmcs02; - vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); if (!vmx->nested.cached_vmcs12) goto out_cached_vmcs12; - vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL); + vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT); if (!vmx->nested.cached_shadow_vmcs12) goto out_cached_shadow_vmcs12; @@ -5696,6 +5721,10 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) { for (i = 0; i < VMX_BITMAP_NR; i++) { + /* + * The vmx_bitmap is not tied to a VM and so should + * not be charged to a memcg. + */ vmx_bitmap[i] = (unsigned long *) __get_free_page(GFP_KERNEL); if (!vmx_bitmap[i]) { diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 6def3ba88e3b..cb6079f8a227 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -34,6 +34,7 @@ struct vmcs_host_state { unsigned long cr4; /* May not match real cr4 */ unsigned long gs_base; unsigned long fs_base; + unsigned long rsp; u16 fs_sel, gs_sel, ldt_sel; #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index bcef2c7e9bc4..7b272738c576 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -1,6 +1,30 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> +#include <asm/bitsperlong.h> +#include <asm/kvm_vcpu_regs.h> + +#define WORD_SIZE (BITS_PER_LONG / 8) + +#define VCPU_RAX __VCPU_REGS_RAX * WORD_SIZE +#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE +#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE +#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE +/* Intentionally omit RSP as it's context switched by hardware */ +#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE +#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE +#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE + +#ifdef CONFIG_X86_64 +#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE +#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE +#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE +#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE +#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE +#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE +#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE +#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE +#endif .text @@ -55,3 +79,146 @@ ENDPROC(vmx_vmenter) ENTRY(vmx_vmexit) ret ENDPROC(vmx_vmexit) + +/** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * + * @regs: unsigned long * (to guest registers) + * @launched: %true if the VMCS has been launched + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail + */ +ENTRY(__vmx_vcpu_run) + push %_ASM_BP + mov %_ASM_SP, %_ASM_BP +#ifdef CONFIG_X86_64 + push %r15 + push %r14 + push %r13 + push %r12 +#else + push %edi + push %esi +#endif + push %_ASM_BX + + /* + * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and + * @regs is needed after VM-Exit to save the guest's register values. + */ + push %_ASM_ARG2 + + /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + + /* Adjust RSP to account for the CALL to vmx_vmenter(). */ + lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + + /* Load @regs to RAX. */ + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ + cmpb $0, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RBX(%_ASM_AX), %_ASM_BX + mov VCPU_RCX(%_ASM_AX), %_ASM_CX + mov VCPU_RDX(%_ASM_AX), %_ASM_DX + mov VCPU_RSI(%_ASM_AX), %_ASM_SI + mov VCPU_RDI(%_ASM_AX), %_ASM_DI + mov VCPU_RBP(%_ASM_AX), %_ASM_BP +#ifdef CONFIG_X86_64 + mov VCPU_R8 (%_ASM_AX), %r8 + mov VCPU_R9 (%_ASM_AX), %r9 + mov VCPU_R10(%_ASM_AX), %r10 + mov VCPU_R11(%_ASM_AX), %r11 + mov VCPU_R12(%_ASM_AX), %r12 + mov VCPU_R13(%_ASM_AX), %r13 + mov VCPU_R14(%_ASM_AX), %r14 + mov VCPU_R15(%_ASM_AX), %r15 +#endif + /* Load guest RAX. This kills the vmx_vcpu pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + + /* Enter guest mode */ + call vmx_vmenter + + /* Jump on VM-Fail. */ + jbe 2f + + /* Temporarily save guest's RAX. */ + push %_ASM_AX + + /* Reload @regs to RAX. */ + mov WORD_SIZE(%_ASM_SP), %_ASM_AX + + /* Save all guest registers, including RAX from the stack */ + __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX) + mov %_ASM_BX, VCPU_RBX(%_ASM_AX) + mov %_ASM_CX, VCPU_RCX(%_ASM_AX) + mov %_ASM_DX, VCPU_RDX(%_ASM_AX) + mov %_ASM_SI, VCPU_RSI(%_ASM_AX) + mov %_ASM_DI, VCPU_RDI(%_ASM_AX) + mov %_ASM_BP, VCPU_RBP(%_ASM_AX) +#ifdef CONFIG_X86_64 + mov %r8, VCPU_R8 (%_ASM_AX) + mov %r9, VCPU_R9 (%_ASM_AX) + mov %r10, VCPU_R10(%_ASM_AX) + mov %r11, VCPU_R11(%_ASM_AX) + mov %r12, VCPU_R12(%_ASM_AX) + mov %r13, VCPU_R13(%_ASM_AX) + mov %r14, VCPU_R14(%_ASM_AX) + mov %r15, VCPU_R15(%_ASM_AX) +#endif + + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ + xor %eax, %eax + + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers + * could lead to speculative execution with the guest's values. + * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially + * free. RSP and RAX are exempt as RSP is restored by hardware during + * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + */ +1: xor %ebx, %ebx + xor %ecx, %ecx + xor %edx, %edx + xor %esi, %esi + xor %edi, %edi + xor %ebp, %ebp +#ifdef CONFIG_X86_64 + xor %r8d, %r8d + xor %r9d, %r9d + xor %r10d, %r10d + xor %r11d, %r11d + xor %r12d, %r12d + xor %r13d, %r13d + xor %r14d, %r14d + xor %r15d, %r15d +#endif + + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP + pop %_ASM_BX + +#ifdef CONFIG_X86_64 + pop %r12 + pop %r13 + pop %r14 + pop %r15 +#else + pop %esi + pop %edi +#endif + pop %_ASM_BP + ret + + /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ +2: mov $1, %eax + jmp 1b +ENDPROC(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 30a6bcd735ec..c73375e01ab8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -246,6 +246,10 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) if (l1tf != VMENTER_L1D_FLUSH_NEVER && !vmx_l1d_flush_pages && !boot_cpu_has(X86_FEATURE_FLUSH_L1D)) { + /* + * This allocation for vmx_l1d_flush_pages is not tied to a VM + * lifetime and so should not be charged to a memcg. + */ page = alloc_pages(GFP_KERNEL, L1D_CACHE_ORDER); if (!page) return -ENOMEM; @@ -2387,13 +2391,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, return 0; } -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu) +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) { int node = cpu_to_node(cpu); struct page *pages; struct vmcs *vmcs; - pages = __alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); + pages = __alloc_pages_node(node, flags, vmcs_config.order); if (!pages) return NULL; vmcs = page_address(pages); @@ -2440,7 +2444,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_init(loaded_vmcs); if (cpu_has_vmx_msr_bitmap()) { - loaded_vmcs->msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); + loaded_vmcs->msr_bitmap = (unsigned long *) + __get_free_page(GFP_KERNEL_ACCOUNT); if (!loaded_vmcs->msr_bitmap) goto out_vmcs; memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE); @@ -2481,7 +2486,7 @@ static __init int alloc_kvm_area(void) for_each_possible_cpu(cpu) { struct vmcs *vmcs; - vmcs = alloc_vmcs_cpu(false, cpu); + vmcs = alloc_vmcs_cpu(false, cpu, GFP_KERNEL); if (!vmcs) { free_kvm_area(); return -ENOMEM; @@ -6360,150 +6365,15 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->hv_timer_armed = false; } -static void __vmx_vcpu_run(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) { - unsigned long evmcs_rsp; - - vmx->__launched = vmx->loaded_vmcs->launched; - - evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? - (unsigned long)¤t_evmcs->host_rsp : 0; - - if (static_branch_unlikely(&vmx_l1d_should_flush)) - vmx_l1d_flush(vcpu); - - asm( - /* Store host registers */ - "push %%" _ASM_DX "; push %%" _ASM_BP ";" - "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ - "push %%" _ASM_CX " \n\t" - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ - "cmp %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" - "je 1f \n\t" - "mov %%" _ASM_SP ", %c[host_rsp](%%" _ASM_CX ") \n\t" - /* Avoid VMWRITE when Enlightened VMCS is in use */ - "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" - "jz 2f \n\t" - "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" - "jmp 1f \n\t" - "2: \n\t" - __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" - "1: \n\t" - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ - - /* Reload cr2 if changed */ - "mov %c[cr2](%%" _ASM_CX "), %%" _ASM_AX " \n\t" - "mov %%cr2, %%" _ASM_DX " \n\t" - "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" - "je 3f \n\t" - "mov %%" _ASM_AX", %%cr2 \n\t" - "3: \n\t" - /* Check if vmlaunch or vmresume is needed */ - "cmpl $0, %c[launched](%%" _ASM_CX ") \n\t" - /* Load guest registers. Don't clobber flags. */ - "mov %c[rax](%%" _ASM_CX "), %%" _ASM_AX " \n\t" - "mov %c[rbx](%%" _ASM_CX "), %%" _ASM_BX " \n\t" - "mov %c[rdx](%%" _ASM_CX "), %%" _ASM_DX " \n\t" - "mov %c[rsi](%%" _ASM_CX "), %%" _ASM_SI " \n\t" - "mov %c[rdi](%%" _ASM_CX "), %%" _ASM_DI " \n\t" - "mov %c[rbp](%%" _ASM_CX "), %%" _ASM_BP " \n\t" -#ifdef CONFIG_X86_64 - "mov %c[r8](%%" _ASM_CX "), %%r8 \n\t" - "mov %c[r9](%%" _ASM_CX "), %%r9 \n\t" - "mov %c[r10](%%" _ASM_CX "), %%r10 \n\t" - "mov %c[r11](%%" _ASM_CX "), %%r11 \n\t" - "mov %c[r12](%%" _ASM_CX "), %%r12 \n\t" - "mov %c[r13](%%" _ASM_CX "), %%r13 \n\t" - "mov %c[r14](%%" _ASM_CX "), %%r14 \n\t" - "mov %c[r15](%%" _ASM_CX "), %%r15 \n\t" -#endif - /* Load guest RCX. This kills the vmx_vcpu pointer! */ - "mov %c[rcx](%%" _ASM_CX "), %%" _ASM_CX " \n\t" - - /* Enter guest mode */ - "call vmx_vmenter\n\t" - - /* Save guest's RCX to the stack placeholder (see above) */ - "mov %%" _ASM_CX ", %c[wordsize](%%" _ASM_SP ") \n\t" - - /* Load host's RCX, i.e. the vmx_vcpu pointer */ - "pop %%" _ASM_CX " \n\t" - - /* Set vmx->fail based on EFLAGS.{CF,ZF} */ - "setbe %c[fail](%%" _ASM_CX ")\n\t" - - /* Save all guest registers, including RCX from the stack */ - "mov %%" _ASM_AX ", %c[rax](%%" _ASM_CX ") \n\t" - "mov %%" _ASM_BX ", %c[rbx](%%" _ASM_CX ") \n\t" - __ASM_SIZE(pop) " %c[rcx](%%" _ASM_CX ") \n\t" - "mov %%" _ASM_DX ", %c[rdx](%%" _ASM_CX ") \n\t" - "mov %%" _ASM_SI ", %c[rsi](%%" _ASM_CX ") \n\t" - "mov %%" _ASM_DI ", %c[rdi](%%" _ASM_CX ") \n\t" - "mov %%" _ASM_BP ", %c[rbp](%%" _ASM_CX ") \n\t" -#ifdef CONFIG_X86_64 - "mov %%r8, %c[r8](%%" _ASM_CX ") \n\t" - "mov %%r9, %c[r9](%%" _ASM_CX ") \n\t" - "mov %%r10, %c[r10](%%" _ASM_CX ") \n\t" - "mov %%r11, %c[r11](%%" _ASM_CX ") \n\t" - "mov %%r12, %c[r12](%%" _ASM_CX ") \n\t" - "mov %%r13, %c[r13](%%" _ASM_CX ") \n\t" - "mov %%r14, %c[r14](%%" _ASM_CX ") \n\t" - "mov %%r15, %c[r15](%%" _ASM_CX ") \n\t" - /* - * Clear host registers marked as clobbered to prevent - * speculative use. - */ - "xor %%r8d, %%r8d \n\t" - "xor %%r9d, %%r9d \n\t" - "xor %%r10d, %%r10d \n\t" - "xor %%r11d, %%r11d \n\t" - "xor %%r12d, %%r12d \n\t" - "xor %%r13d, %%r13d \n\t" - "xor %%r14d, %%r14d \n\t" - "xor %%r15d, %%r15d \n\t" -#endif - "mov %%cr2, %%" _ASM_AX " \n\t" - "mov %%" _ASM_AX ", %c[cr2](%%" _ASM_CX ") \n\t" - - "xor %%eax, %%eax \n\t" - "xor %%ebx, %%ebx \n\t" - "xor %%esi, %%esi \n\t" - "xor %%edi, %%edi \n\t" - "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" - : ASM_CALL_CONSTRAINT - : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), - [launched]"i"(offsetof(struct vcpu_vmx, __launched)), - [fail]"i"(offsetof(struct vcpu_vmx, fail)), - [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), - [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])), - [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])), - [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])), - [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])), - [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])), - [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])), - [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])), -#ifdef CONFIG_X86_64 - [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])), - [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])), - [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])), - [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])), - [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])), - [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])), - [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), - [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), -#endif - [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), - [wordsize]"i"(sizeof(ulong)) - : "cc", "memory" -#ifdef CONFIG_X86_64 - , "rax", "rbx", "rdi" - , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" -#else - , "eax", "ebx", "edi" -#endif - ); + if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { + vmx->loaded_vmcs->host_state.rsp = host_rsp; + vmcs_writel(HOST_RSP, host_rsp); + } } -STACK_FRAME_NON_STANDARD(__vmx_vcpu_run); + +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { @@ -6572,7 +6442,16 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); - __vmx_vcpu_run(vcpu, vmx); + if (static_branch_unlikely(&vmx_l1d_should_flush)) + vmx_l1d_flush(vcpu); + + if (vcpu->arch.cr2 != read_cr2()) + write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, + vmx->loaded_vmcs->launched); + + vcpu->arch.cr2 = read_cr2(); /* * We do not use IBRS in the kernel. If this vCPU has used the @@ -6657,7 +6536,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) static struct kvm *vmx_vm_alloc(void) { - struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx)); + struct kvm_vmx *kvm_vmx = __vmalloc(sizeof(struct kvm_vmx), + GFP_KERNEL_ACCOUNT | __GFP_ZERO, + PAGE_KERNEL); return &kvm_vmx->kvm; } @@ -6673,7 +6554,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) if (enable_pml) vmx_destroy_pml_buffer(vmx); free_vpid(vmx->vpid); - leave_guest_mode(vcpu); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); kfree(vmx->guest_msrs); @@ -6685,14 +6565,16 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) { int err; - struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + struct vcpu_vmx *vmx; unsigned long *msr_bitmap; int cpu; + vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); if (!vmx) return ERR_PTR(-ENOMEM); - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, GFP_KERNEL); + vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, + GFP_KERNEL_ACCOUNT); if (!vmx->vcpu.arch.guest_fpu) { printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); err = -ENOMEM; @@ -6714,12 +6596,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) * for the guest, etc. */ if (enable_pml) { - vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); + vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmx->pml_pg) goto uninit_vcpu; } - vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0]) > PAGE_SIZE); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 0ac0a64c7790..1554cb45b393 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -175,7 +175,6 @@ struct nested_vmx { struct vcpu_vmx { struct kvm_vcpu vcpu; - unsigned long host_rsp; u8 fail; u8 msr_bitmap_mode; u32 exit_intr_info; @@ -209,7 +208,7 @@ struct vcpu_vmx { struct loaded_vmcs vmcs01; struct loaded_vmcs *loaded_vmcs; struct loaded_vmcs *loaded_cpu_state; - bool __launched; /* temporary, used in vmx_vcpu_run */ + struct msr_autoload { struct vmx_msrs guest; struct vmx_msrs host; @@ -339,8 +338,8 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) static inline void pi_set_sn(struct pi_desc *pi_desc) { - return set_bit(POSTED_INTR_SN, - (unsigned long *)&pi_desc->control); + set_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); } static inline void pi_set_on(struct pi_desc *pi_desc) @@ -445,7 +444,8 @@ static inline u32 vmx_vmentry_ctrl(void) { u32 vmentry_ctrl = vmcs_config.vmentry_ctrl; if (pt_mode == PT_MODE_SYSTEM) - vmentry_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | VM_EXIT_CLEAR_IA32_RTIT_CTL); + vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | + VM_ENTRY_LOAD_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ return vmentry_ctrl & ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER); @@ -455,9 +455,10 @@ static inline u32 vmx_vmexit_ctrl(void) { u32 vmexit_ctrl = vmcs_config.vmexit_ctrl; if (pt_mode == PT_MODE_SYSTEM) - vmexit_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP | VM_ENTRY_LOAD_IA32_RTIT_CTL); + vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP | + VM_EXIT_CLEAR_IA32_RTIT_CTL); /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */ - return vmcs_config.vmexit_ctrl & + return vmexit_ctrl & ~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER); } @@ -478,7 +479,7 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) return &(to_vmx(vcpu)->pi_desc); } -struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu); +struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); void free_vmcs(struct vmcs *vmcs); int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); @@ -487,7 +488,8 @@ void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs); static inline struct vmcs *alloc_vmcs(bool shadow) { - return alloc_vmcs_cpu(shadow, raw_smp_processor_id()); + return alloc_vmcs_cpu(shadow, raw_smp_processor_id(), + GFP_KERNEL_ACCOUNT); } u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 941f932373d0..65e4559eef2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; if (!lapic_in_kernel(vcpu)) goto out; - u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL); + u.lapic = kzalloc(sizeof(struct kvm_lapic_state), + GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!u.lapic) @@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_GET_XSAVE: { - u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL); + u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!u.xsave) break; @@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_GET_XCRS: { - u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL); + u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT); r = -ENOMEM; if (!u.xcrs) break; @@ -7055,6 +7056,13 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) { + if (!lapic_in_kernel(vcpu)) { + WARN_ON_ONCE(vcpu->arch.apicv_active); + return; + } + if (!vcpu->arch.apicv_active) + return; + vcpu->arch.apicv_active = false; kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); } @@ -9005,7 +9013,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) struct page *page; int r; - vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -9026,6 +9033,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_pio_data; if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu); r = kvm_create_lapic(vcpu); if (r < 0) goto fail_mmu_destroy; @@ -9033,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) static_key_slow_inc(&kvm_no_apic_vcpu); vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!vcpu->arch.mce_banks) { r = -ENOMEM; goto fail_free_lapic; } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; - if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, + GFP_KERNEL_ACCOUNT)) { r = -ENOMEM; goto fail_free_mce_banks; } @@ -9104,7 +9113,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -9299,13 +9307,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, slot->arch.rmap[i] = kvcalloc(lpages, sizeof(*slot->arch.rmap[i]), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!slot->arch.rmap[i]) goto out_free; if (i == 0) continue; - linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL); + linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT); if (!linfo) goto out_free; @@ -9348,13 +9356,13 @@ out_free: return -ENOMEM; } -void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) { /* * memslots->generation has been incremented. * mmio generation may have reached its maximum value. */ - kvm_mmu_invalidate_mmio_sptes(kvm, slots); + kvm_mmu_invalidate_mmio_sptes(kvm, gen); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -9462,7 +9470,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, void kvm_arch_flush_shadow_all(struct kvm *kvm) { - kvm_mmu_invalidate_zap_all_pages(kvm); + kvm_mmu_zap_all(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 224cd0a47568..28406aa1136d 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -181,6 +181,11 @@ static inline bool emul_is_noncanonical_address(u64 la, static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, unsigned access) { + u64 gen = kvm_memslots(vcpu->kvm)->generation; + + if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS)) + return; + /* * If this is a shadow nested page table, the "GVA" is * actually a nGPA. @@ -188,7 +193,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, vcpu->arch.mmio_gva = mmu_is_nested(vcpu) ? 0 : gva & PAGE_MASK; vcpu->arch.access = access; vcpu->arch.mmio_gfn = gfn; - vcpu->arch.mmio_gen = kvm_memslots(vcpu->kvm)->generation; + vcpu->arch.mmio_gen = gen; } static inline bool vcpu_match_mmio_gen(struct kvm_vcpu *vcpu) |