summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 04:38:31 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-07 04:38:31 +0300
commitc136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch)
tree985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 /arch/powerpc/kvm/book3s_hv.c
parente0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff)
parent1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff)
downloadlinux-c136b84393d4e340e1b53fc7f737dd5827b19ee5.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "PPC: - Better machine check handling for HV KVM - Ability to support guests with threads=2, 4 or 8 on POWER9 - Fix for a race that could cause delayed recognition of signals - Fix for a bug where POWER9 guests could sleep with interrupts pending. ARM: - VCPU request overhaul - allow timer and PMU to have their interrupt number selected from userspace - workaround for Cavium erratum 30115 - handling of memory poisonning - the usual crop of fixes and cleanups s390: - initial machine check forwarding - migration support for the CMMA page hinting information - cleanups and fixes x86: - nested VMX bugfixes and improvements - more reliable NMI window detection on AMD - APIC timer optimizations Generic: - VCPU request overhaul + documentation of common code patterns - kvm_stat improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits) Update my email address kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12 kvm: x86: mmu: allow A/D bits to be disabled in an mmu x86: kvm: mmu: make spte mmio mask more explicit x86: kvm: mmu: dead code thanks to access tracking KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code KVM: PPC: Book3S HV: Close race with testing for signals on guest entry KVM: PPC: Book3S HV: Simplify dynamic micro-threading code KVM: x86: remove ignored type attribute KVM: LAPIC: Fix lapic timer injection delay KVM: lapic: reorganize restart_apic_timer KVM: lapic: reorganize start_hv_timer kvm: nVMX: Check memory operand to INVVPID KVM: s390: Inject machine check into the nested guest KVM: s390: Inject machine check into the guest tools/kvm_stat: add new interactive command 'b' tools/kvm_stat: add new command line switch '-i' tools/kvm_stat: fix error on interactive command 'g' KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit ...
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c511
1 files changed, 399 insertions, 112 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 773b35d16a0b..0b436df746fc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -46,6 +46,8 @@
#include <linux/of.h>
#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
unsigned long stolen;
unsigned long core_stolen;
u64 now;
+ unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
- spin_lock_irq(&vcpu->arch.tbacct_lock);
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
- spin_unlock_irq(&vcpu->arch.tbacct_lock);
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
+/* See if there is a doorbell interrupt pending for a vcpu */
+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+{
+ int thr;
+ struct kvmppc_vcore *vc;
+
+ if (vcpu->arch.doorbell_request)
+ return true;
+ /*
+ * Ensure that the read of vcore->dpdes comes after the read
+ * of vcpu->doorbell_request. This barrier matches the
+ * lwsync in book3s_hv_rmhandlers.S just before the
+ * fast_guest_return label.
+ */
+ smp_rmb();
+ vc = vcpu->arch.vcore;
+ thr = vcpu->vcpu_id - vc->first_vcpuid;
+ return !!(vc->dpdes & (1 << thr));
+}
+
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
}
}
+static void do_nothing(void *x)
+{
+}
+
+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+{
+ int thr, cpu, pcpu, nthreads;
+ struct kvm_vcpu *v;
+ unsigned long dpdes;
+
+ nthreads = vcpu->kvm->arch.emul_smt_mode;
+ dpdes = 0;
+ cpu = vcpu->vcpu_id & ~(nthreads - 1);
+ for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+ v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+ if (!v)
+ continue;
+ /*
+ * If the vcpu is currently running on a physical cpu thread,
+ * interrupt it in order to pull it out of the guest briefly,
+ * which will update its vcore->dpdes value.
+ */
+ pcpu = READ_ONCE(v->cpu);
+ if (pcpu >= 0)
+ smp_call_function_single(pcpu, do_nothing, NULL, 1);
+ if (kvmppc_doorbell_pending(v))
+ dpdes |= 1 << thr;
+ }
+ return dpdes;
+}
+
+/*
+ * On POWER9, emulate doorbell-related instructions in order to
+ * give the guest the illusion of running on a multi-threaded core.
+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+ * and mfspr DPDES.
+ */
+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+{
+ u32 inst, rb, thr;
+ unsigned long arg;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tvcpu;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return EMULATE_FAIL;
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
+ return RESUME_GUEST;
+ if (get_op(inst) != 31)
+ return EMULATE_FAIL;
+ rb = get_rb(inst);
+ thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+ switch (get_xop(inst)) {
+ case OP_31_XOP_MSGSNDP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ arg &= 0x3f;
+ if (arg >= kvm->arch.emul_smt_mode)
+ break;
+ tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+ if (!tvcpu)
+ break;
+ if (!tvcpu->arch.doorbell_request) {
+ tvcpu->arch.doorbell_request = 1;
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
+ }
+ break;
+ case OP_31_XOP_MSGCLRP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ vcpu->arch.vcore->dpdes = 0;
+ vcpu->arch.doorbell_request = 0;
+ break;
+ case OP_31_XOP_MFSPR:
+ switch (get_sprn(inst)) {
+ case SPRN_TIR:
+ arg = thr;
+ break;
+ case SPRN_DPDES:
+ arg = kvmppc_read_dpdes(vcpu);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ return RESUME_GUEST;
+}
+
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
- /*
- * Deliver a machine check interrupt to the guest.
- * We have to do this, even if the host has handled the
- * machine check, because machine checks use SRR0/1 and
- * the interrupt might have trashed guest state in them.
- */
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_MACHINE_CHECK);
- r = RESUME_GUEST;
+ /* Exit to guest with KVM_EXIT_NMI as exit reason */
+ run->exit_reason = KVM_EXIT_NMI;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ /* Clear out the old NMI status from run->flags */
+ run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+ /* Now set the NMI status */
+ if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+ run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+ else
+ run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest (kernel or userspace), does something that
- * is prohibited by HFSCR. We just generate a program interrupt to
- * the guest.
+ * is prohibited by HFSCR.
+ * On POWER9, this could be a doorbell instruction that we need
+ * to emulate.
+ * Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ r = EMULATE_FAIL;
+ if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ r = kvmppc_emulate_doorbell_instr(vcpu);
+ if (r == EMULATE_FAIL) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ }
break;
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
+ /*
+ * On POWER9, allow userspace to enable large decrementer for the
+ * guest, whether or not the host has it enabled.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ mask |= LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
@@ -1611,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_vcore();
+ vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1770,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err = -EINVAL;
+ int err;
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_vcore();
- if (core >= KVM_MAX_VCORES)
- goto out;
-
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@@ -1808,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+ /*
+ * Set the default HFSCR for the guest from the host value.
+ * This value is only used on POWER9.
+ * On POWER9 DD1, TM doesn't work, so we make sure to
+ * prevent the guest from using it.
+ * On POWER9, we want to virtualize the doorbell facility, so we
+ * turn off the HFSCR bit, which causes those instructions to trap.
+ */
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+ if (!cpu_has_feature(CPU_FTR_TM))
+ vcpu->arch.hfscr &= ~HFSCR_TM;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.hfscr &= ~HFSCR_MSGP;
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1815,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
- vcore = kvm->arch.vcores[core];
- if (!vcore) {
- vcore = kvmppc_vcore_create(kvm, core);
- kvm->arch.vcores[core] = vcore;
- kvm->arch.online_vcores++;
+ vcore = NULL;
+ err = -EINVAL;
+ core = id / kvm->arch.smt_mode;
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ err = -ENOMEM;
+ vcore = kvmppc_vcore_create(kvm, core);
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
}
mutex_unlock(&kvm->lock);
@@ -1847,6 +1999,43 @@ out:
return ERR_PTR(err);
}
+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+ unsigned long flags)
+{
+ int err;
+ int esmt = 0;
+
+ if (flags)
+ return -EINVAL;
+ if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+ return -EINVAL;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * On POWER8 (or POWER7), the threading mode is "strict",
+ * so we pack smt_mode vcpus per vcore.
+ */
+ if (smt_mode > threads_per_subcore)
+ return -EINVAL;
+ } else {
+ /*
+ * On POWER9, the threading mode is "loose",
+ * so each vcpu gets its own vcore.
+ */
+ esmt = smt_mode;
+ smt_mode = 1;
+ }
+ mutex_lock(&kvm->lock);
+ err = -EBUSY;
+ if (!kvm->arch.online_vcores) {
+ kvm->arch.smt_mode = smt_mode;
+ kvm->arch.emul_smt_mode = esmt;
+ err = 0;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return err;
+}
+
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
@@ -1897,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1962,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void do_nothing(void *x)
-{
-}
-
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
int i;
@@ -1983,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
}
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * With radix, the guest can do TLB invalidations itself,
+ * and it could choose to use the local form (tlbiel) if
+ * it is invalidating a translation that has only ever been
+ * used on one vcpu. However, that doesn't mean it has
+ * only ever been used on one physical cpu, since vcpus
+ * can move around between pcpus. To cope with this, when
+ * a vcpu moves from one pcpu to another, we need to tell
+ * any vcpus running on the same core as this vcpu previously
+ * ran to flush the TLB. The TLB is shared between threads,
+ * so we use a single bit in .need_tlb_flush for all 4 threads.
+ */
+ if (vcpu->arch.prev_cpu != pcpu) {
+ if (vcpu->arch.prev_cpu >= 0 &&
+ cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+ cpu_first_thread_sibling(pcpu))
+ radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+ vcpu->arch.prev_cpu = pcpu;
+ }
+}
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *mvc = vc->master_vcore;
struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
@@ -1997,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.timer_running = 0;
}
cpu += vcpu->arch.ptid;
- vcpu->cpu = mvc->pcpu;
+ vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
-
- /*
- * With radix, the guest can do TLB invalidations itself,
- * and it could choose to use the local form (tlbiel) if
- * it is invalidating a translation that has only ever been
- * used on one vcpu. However, that doesn't mean it has
- * only ever been used on one physical cpu, since vcpus
- * can move around between pcpus. To cope with this, when
- * a vcpu moves from one pcpu to another, we need to tell
- * any vcpus running on the same core as this vcpu previously
- * ran to flush the TLB. The TLB is shared between threads,
- * so we use a single bit in .need_tlb_flush for all 4 threads.
- */
- if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
- if (vcpu->arch.prev_cpu >= 0 &&
- cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
- cpu_first_thread_sibling(cpu))
- radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
- vcpu->arch.prev_cpu = cpu;
- }
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
- tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+ tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
- tpaca->kvm_hstate.kvm_vcore = mvc;
+ tpaca->kvm_hstate.kvm_vcore = vc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -2155,8 +2344,7 @@ struct core_info {
int max_subcore_threads;
int total_threads;
int subcore_threads[MAX_SUBCORES];
- struct kvm *subcore_vm[MAX_SUBCORES];
- struct list_head vcs[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
@@ -2167,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
{
- int sub;
-
memset(cip, 0, sizeof(*cip));
cip->n_subcores = 1;
cip->max_subcore_threads = vc->num_threads;
cip->total_threads = vc->num_threads;
cip->subcore_threads[0] = vc->num_threads;
- cip->subcore_vm[0] = vc->kvm;
- for (sub = 0; sub < MAX_SUBCORES; ++sub)
- INIT_LIST_HEAD(&cip->vcs[sub]);
- list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+ cip->vc[0] = vc;
}
static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2197,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
}
-static void init_master_vcore(struct kvmppc_vcore *vc)
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
{
- vc->master_vcore = vc;
vc->entry_exit_map = 0;
vc->in_guest = 0;
vc->napping_threads = 0;
@@ -2224,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
++cip->n_subcores;
cip->total_threads += vc->num_threads;
cip->subcore_threads[sub] = vc->num_threads;
- cip->subcore_vm[sub] = vc->kvm;
- init_master_vcore(vc);
- list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+ cip->vc[sub] = vc;
+ init_vcore_to_run(vc);
+ list_del_init(&vc->preempt_list);
return true;
}
@@ -2294,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
+static bool recheck_signals(struct core_info *cip)
+{
+ int sub, i;
+ struct kvm_vcpu *vcpu;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ if (signal_pending(vcpu->arch.run_task))
+ return true;
+ return false;
+}
+
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
int still_running = 0, i;
@@ -2331,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
wake_up(&vcpu->arch.cpu_run);
}
}
- list_del_init(&vc->preempt_list);
if (!is_master) {
if (still_running > 0) {
kvmppc_vcore_preempt(vc);
@@ -2393,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
return 0;
}
+static void set_irq_happened(int trap)
+{
+ switch (trap) {
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ local_paca->irq_happened |= PACA_IRQ_EE;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ local_paca->irq_happened |= PACA_IRQ_DBELL;
+ break;
+ case BOOK3S_INTERRUPT_HMI:
+ local_paca->irq_happened |= PACA_IRQ_HMI;
+ break;
+ }
+}
+
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
@@ -2403,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int i;
int srcu_idx;
struct core_info core_info;
- struct kvmppc_vcore *pvc, *vcnext;
+ struct kvmppc_vcore *pvc;
struct kvm_split_mode split_info, *sip;
int split, subcore_size, active;
int sub;
@@ -2412,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int pcpu, thr;
int target_threads;
int controlled_threads;
+ int trap;
/*
* Remove from the list any threads that have a signal pending
@@ -2426,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/*
* Initialize *vc.
*/
- init_master_vcore(vc);
+ init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
/*
@@ -2463,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
+ /*
+ * On radix, arrange for TLB flushing if necessary.
+ * This has to be done before disabling interrupts since
+ * it uses smp_call_function().
+ */
+ pcpu = smp_processor_id();
+ if (kvm_is_radix(vc->kvm)) {
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ }
+
+ /*
+ * Hard-disable interrupts, and check resched flag and signals.
+ * If we need to reschedule or deliver a signal, clean up
+ * and return without going into the guest(s).
+ */
+ local_irq_disable();
+ hard_irq_disable();
+ if (lazy_irq_pending() || need_resched() ||
+ recheck_signals(&core_info)) {
+ local_irq_enable();
+ vc->vcore_state = VCORE_INACTIVE;
+ /* Unlock all except the primary vcore */
+ for (sub = 1; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ /* Put back on to the preempted vcores list */
+ kvmppc_vcore_preempt(pvc);
+ spin_unlock(&pvc->lock);
+ }
+ for (i = 0; i < controlled_threads; ++i)
+ kvmppc_release_hwthread(pcpu + i);
+ return;
+ }
+
+ kvmppc_clear_host_core(pcpu);
+
/* Decide on micro-threading (split-core) mode */
subcore_size = threads_per_subcore;
cmd_bit = stat_bit = 0;
@@ -2486,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
for (sub = 0; sub < core_info.n_subcores; ++sub)
- split_info.master_vcs[sub] =
- list_first_entry(&core_info.vcs[sub],
- struct kvmppc_vcore, preempt_list);
+ split_info.vc[sub] = core_info.vc[sub];
/* order writes to split_info before kvm_split_mode pointer */
smp_wmb();
}
- pcpu = smp_processor_id();
for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
@@ -2512,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
- kvmppc_clear_host_core(pcpu);
-
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = subcore_thread_map[sub];
thr0_done = false;
active |= 1 << thr;
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
- pvc->pcpu = pcpu + thr;
- for_each_runnable_thread(i, vcpu, pvc) {
- kvmppc_start_thread(vcpu, pvc);
- kvmppc_create_dtl_entry(vcpu, pvc);
- trace_kvm_guest_enter(vcpu);
- if (!vcpu->arch.ptid)
- thr0_done = true;
- active |= 1 << (thr + vcpu->arch.ptid);
- }
- /*
- * We need to start the first thread of each subcore
- * even if it doesn't have a vcpu.
- */
- if (pvc->master_vcore == pvc && !thr0_done)
- kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
+ pvc = core_info.vc[sub];
+ pvc->pcpu = pcpu + thr;
+ for_each_runnable_thread(i, vcpu, pvc) {
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_create_dtl_entry(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
}
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (!thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ thr += pvc->num_threads;
}
/*
@@ -2564,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 0);
for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
- spin_unlock(&pvc->lock);
+ spin_unlock(&core_info.vc[sub]->lock);
+
+ /*
+ * Interrupts will be enabled once we get into the guest,
+ * so tell lockdep that we're about to enable interrupts.
+ */
+ trace_hardirqs_on();
guest_enter();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
- __kvmppc_vcore_entry();
+ trap = __kvmppc_vcore_entry();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ guest_exit();
+
+ trace_hardirqs_off();
+ set_irq_happened(trap);
+
spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
@@ -2602,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.do_nap = 0;
}
+ kvmppc_set_host_core(pcpu);
+
+ local_irq_enable();
+
/* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
@@ -2610,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
- kvmppc_set_host_core(pcpu);
-
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
- guest_exit();
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
- preempt_list)
- post_guest_process(pvc, pvc == vc);
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ post_guest_process(pvc, pvc == vc);
+ }
spin_lock(&vc->lock);
preempt_enable();
@@ -2666,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
+#ifdef CONFIG_KVM_XICS
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ if (!xive_enabled())
+ return false;
+ return vcpu->arch.xive_saved_state.pipr <
+ vcpu->arch.xive_saved_state.cppr;
+}
+#else
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_XICS */
+
+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+ kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+ return true;
+
+ return false;
+}
+
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
@@ -2676,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
- vcpu->arch.prodded)
+ if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
return 1;
}
@@ -2819,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
*/
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_PIGGYBACK) {
- struct kvmppc_vcore *mvc = vc->master_vcore;
- if (spin_trylock(&mvc->lock)) {
- if (mvc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(mvc)) {
+ if (spin_trylock(&vc->lock)) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ !VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
}
- spin_unlock(&mvc->lock);
+ spin_unlock(&vc->lock);
}
} else if (vc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(vc)) {
@@ -2863,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
n_ceded = 0;
for_each_runnable_thread(i, v, vc) {
- if (!v->arch.pending_exceptions && !v->arch.prodded)
+ if (!kvmppc_vcpu_woken(v))
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
@@ -3519,6 +3792,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm_hv_vm_activated();
/*
+ * Initialize smt_mode depending on processor.
+ * POWER8 and earlier have to use "strict" threading, where
+ * all vCPUs in a vcore have to run on the same (sub)core,
+ * whereas on POWER9 the threads can each run a different
+ * guest.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.smt_mode = threads_per_subcore;
+ else
+ kvm->arch.smt_mode = 1;
+ kvm->arch.emul_smt_mode = 1;
+
+ /*
* Create a debugfs directory for the VM
*/
snprintf(buf, sizeof(buf), "vm%d", current->pid);
@@ -3947,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
+ .set_smt_mode = kvmhv_set_smt_mode,
};
static int kvm_init_subcore_bitmap(void)